def addToTotalSize(path, total_size): """ Add the size of file with 'path' to the total size of all in/output files """ if os.path.exists(path): from SiteMover import SiteMover sitemover = SiteMover() # Get the file size fsize = sitemover.getLocalFileSize(path) tolog("Size of file %s: %s B" % (path, fsize)) if fsize != "": total_size += long(fsize) else: tolog("Skipping file %s in work dir size check since it is not present" % (path)) return total_size
def prepareOutFiles(outFiles, logFile, workdir): """ verify and prepare and the output files for transfer """ ec = 0 pilotErrorDiag = "" outs = [] modt = [] from SiteMover import SiteMover for outf in outFiles: if outf and outf != 'NULL': # non-empty string and not NULL if not os.path.isfile("%s/%s" % (workdir, outf)): # expected output file is missing pilotErrorDiag = "Expected output file %s does not exist" % (outf) tolog("!!FAILED!!3000!! %s" % (pilotErrorDiag)) error = PilotErrors() ec = error.ERR_MISSINGOUTPUTFILE break else: outs.append(outf) # get the modification time for the file (needed by NG) modt.append(SiteMover.getModTime(workdir, outf)) tolog("Output file(s):") try: _ec, _rs = commands.getstatusoutput("ls -l %s/%s" % (workdir, outf)) except Exception, e: tolog(str(e)) else: tolog(_rs)
def updateSURLDictionary(self, guid, surl, directory, jobId): # OLD functuonality: FIX ME LATER: avoid using intermediate buffer """ add the guid and surl to the surl dictionary """ # temporary quick workaround: TO BE properly implemented later # the data should be passed directly instead of using intermediate JSON/CPICKLE file buffers # from SiteMover import SiteMover return SiteMover.updateSURLDictionary(guid, surl, directory, jobId)
def performLRCFileRegistration(self, fields, ub=None): """ actually register all files in the LRC """ from urllib import urlencode, urlopen ret = '1' ec = 0 if ub != "None" and ub != None and ub != "": # ub is 'None' outside the US # find out if checksum or adler32 should be added from SiteMover import SiteMover _checksum = fields[4].split("+")[0] # first one, assume same type for the rest if len(_checksum) > 0: csumtype = SiteMover.getChecksumType(_checksum) else: csumtype = CMD_CHECKSUM # use default (md5sum) if csumtype == "adler32": params = urlencode({'pfns': fields[0], 'lfns': fields[1], 'guids': fields[2], 'fsizes': fields[3],\ 'md5sums': '', 'adler32s': fields[4], 'archivals': fields[5]}) else: params = urlencode({'pfns': fields[0], 'lfns': fields[1], 'guids': fields[2], 'fsizes': fields[3],\ 'md5sums': fields[4], 'adler32s': '', 'archivals': fields[5]}) try: url = ub + '/lrc/files' if url.find('//lrc') > 0: url = url.replace('//lrc','/lrc') tolog("Will send params: %s" % str(params)) tolog("Trying urlopen with: %s" % (url)) f = urlopen(url, params) except Exception, e: tolog("!!WARNING!!4000!! Unexpected exception: %s" % str(e)) ec = error.ERR_DDMREG ret = str(e) else: ret = f.read() if ret != '1': ret = ret.replace('\n', ' ') tolog('!!WARNING!!4000!! LRC registration error: %s' % str(ret)) tolog('!!WARNING!!4000!! LRC URL requested: %s' % f.geturl()) if ret == 'LFNnonunique': ec = error.ERR_LRCREGDUP elif ret.find("guid-metadata entry already exists") >= 0: ec = error.ERR_GUIDSEXISTSINLRC else: ec = error.ERR_DDMREG
def prepareOutFiles(outFiles, logFile, workdir, fullpath=False): """ verify and prepare the output files for transfer """ # fullpath = True means that the file in outFiles already has a full path, adding it to workdir is then not needed ec = 0 pilotErrorDiag = "" outs = [] modt = [] from SiteMover import SiteMover for outf in outFiles: if outf and outf != 'NULL': # non-empty string and not NULL path = os.path.join(workdir, outf) if (not os.path.isfile(path) and not fullpath) or (not os.path.isfile(outf) and fullpath): pilotErrorDiag = "Expected output file %s does not exist" % (path) tolog("!!FAILED!!3000!! %s" % (pilotErrorDiag)) error = PilotErrors() ec = error.ERR_MISSINGOUTPUTFILE break else: tolog("outf = %s" % (outf)) if fullpath: # remove the full path here from outf workdir = os.path.dirname(outf) outf = os.path.basename(outf) outs.append(outf) # get the modification time for the file (needed by NG) modt.append(SiteMover.getModTime(workdir, outf)) tolog("Output file(s):") try: _ec, _rs = commands.getstatusoutput("ls -l %s/%s" % (workdir, outf)) except Exception, e: tolog(str(e)) else: tolog(_rs)
def prepareOutFiles(outFiles, logFile, workdir, fullpath=False): """ verify and prepare and the output files for transfer """ # fullpath = True means that the file in outFiles already has a full path, adding it to workdir is then not needed ec = 0 pilotErrorDiag = "" outs = [] modt = [] from SiteMover import SiteMover for outf in outFiles: if outf and outf != 'NULL': # non-empty string and not NULL if (not os.path.isfile("%s/%s" % (workdir, outf)) and not fullpath) or (not os.path.isfile(outf) and fullpath): # expected output file is missing pilotErrorDiag = "Expected output file %s does not exist" % (outf) tolog("!!FAILED!!3000!! %s" % (pilotErrorDiag)) error = PilotErrors() ec = error.ERR_MISSINGOUTPUTFILE break else: tolog("outf = %s" % (outf)) if fullpath: # remove the full path here from outf workdir = os.path.dirname(outf) outf = os.path.basename(outf) outs.append(outf) # get the modification time for the file (needed by NG) modt.append(SiteMover.getModTime(workdir, outf)) tolog("Output file(s):") try: _ec, _rs = commands.getstatusoutput("ls -l %s/%s" % (workdir, outf)) except Exception, e: tolog(str(e)) else: tolog(_rs)
def updatePandaServer(self, job, site, workerNode, port, xmlstr=None, spaceReport=False, log=None, ra=0, jr=False, useCoPilot=False, stdout_tail="", stdout_path="", additionalMetadata=None): """ Update the job status with the jobdispatcher web server. State is a tuple of (jobId, ["jobstatus", transExitCode, pilotErrorCode], timestamp) log = log extracts xmlstr is set in postJobTask for finished jobs (all files). Failed jobs will only send xml for log (created in this function) jr = job recovery mode """ tolog("Updating job status in updatePandaServer(): PandaId=%s, result=%s, time=%s" % (job.getState())) # set any holding job to failed for sites that do not use job recovery (e.g. sites with LSF, that immediately # removes any work directory after the LSF job finishes which of course makes job recovery impossible) if not self.__jobrec: if job.result[0] == 'holding' and site.sitename != "CERNVM": job.result[0] = 'failed' tolog("This site does not support job recovery: HOLDING state reset to FAILED") # note: any changed job state above will be lost for fake server updates, does it matter? # get the node structure expected by the server node = self.getNodeStructure(job, site, workerNode, spaceReport=spaceReport, log=log) # skip the server update (e.g. on NG) if not self.__updateServer: tolog("(fake server update)") return 0, node tolog("xmlstr = %s" % (xmlstr)) # get the xml node['xml'] = self.getXML(job, site.sitename, site.workdir, xmlstr=xmlstr, jr=jr) # stdout tail in case job.debug == 'true' if job.debug.lower() == "true" and stdout_tail != "": # protection for potentially large tails stdout_tail = stdout_tail[-2048:] node['stdout'] = stdout_tail tolog("Will send stdout tail:\n%s (length = %d)" % (stdout_tail, len(stdout_tail))) # also send the full stdout to a text indexer if required if stdout_path != "": if "stdout_to_text_indexer" in readpar('catchall') and os.path.exists(stdout_path): tolog("Will send payload stdout to text indexer") # get the user name, which we will use to create a proper filename from SiteMover import SiteMover s = SiteMover() username = s.extractUsername(job.prodUserID) # get setup path for xrdcp try: si = getSiteInformation(job.experiment) setup_path = si.getLocalROOTSetup() filename = "PanDA_payload_stdout-%s.txt" % (job.jobId) dateDirs = self.getDateDirs() remotePath = os.path.join(os.path.join(username, dateDirs), filename) url = "root://faxbox.mwt2.org//group/logs/pilot/%s" % (remotePath) cmd = "%sxrdcp -f %s %s" % (setup_path, stdout_path, url) tolog("Executing command: %s" % (cmd)) rc, rs = getstatusoutput(cmd) tolog("rc=%d, rs=%s" % (rc, rs)) except Exception, e: tolog("!!WARNING!!3322!! Failed with text indexer: %s" % (e)) else: tolog("stdout_path not set")
def getXML(self, job, sitename, workdir, xmlstr=None, jr=False): """ Get the metadata xml """ node_xml = "" tolog("getXML called") # for backwards compatibility try: experiment = job.experiment except: experiment = "unknown" # do not send xml for state 'holding' (will be sent by a later pilot during job recovery) if job.result[0] == 'holding' and sitename != "CERNVM": pass else: # only create and send log xml if the log was transferred if job.result[0] == 'failed' and isLogfileCopied(workdir): # generate the xml string for log file # at this time the job.workdir might have been removed (because this function can be called # after the removal of workdir is done), so we make a new dir xmldir = "%s/XML4PandaJob_%s" % (workdir, job.jobId) # group rw permission added as requested by LYON ec, rv = getstatusoutput("mkdir -m g+rw %s" % (xmldir)) if ec != 0: tolog("!!WARNING!!1300!! Could not create xmldir from updatePandaServer: %d, %s (resetting to site workdir)" % (ec, rv)) cmd = "ls -l %s" % (xmldir) out = getoutput(cmd) tolog("%s \n%s" % (cmd, out)) xmldir = workdir # which checksum command should be used? query the site mover from SiteMoverFarm import getSiteMover sitemover = getSiteMover(readpar('copytool'), "") if os.environ.has_key('Nordugrid_pilot'): fname = os.path.join(self.__pilot_initdir, job.logFile) else: fname = os.path.join(workdir, job.logFile) if os.path.exists(fname): fnamelog = "%s/logfile.xml" % (xmldir) guids_status = PFCxml(experiment, fnamelog, fntag="lfn", alog=job.logFile, alogguid=job.tarFileGuid, jr=jr) from SiteMover import SiteMover ec, pilotErrorDiag, _fsize, _checksum = SiteMover.getLocalFileInfo(fname, csumtype=sitemover.getChecksumCommand()) if ec != 0: tolog("!!WARNING!!1300!! getLocalFileInfo failed: (%d, %s, %s)" % (ec, str(_fsize), str(_checksum))) tolog("!!WARNING!!1300!! Can not set XML (will not be sent to server)") node_xml = '' else: ec, _strXML = updateMetadata(fnamelog, _fsize, _checksum) if ec == 0: tolog("Added (%s, %s) to metadata file (%s)" % (_fsize, _checksum, fnamelog)) else: tolog("!!WARNING!!1300!! Could not add (%s, %s) to metadata file (%s). XML will be incomplete: %d" %\ (_fsize, _checksum, fnamelog, ec)) # add skipped file info _skippedfname = os.path.join(workdir, "skipped.xml") if os.path.exists(_skippedfname): ec = addSkippedToPFC(fnamelog, _skippedfname) try: f = open(fnamelog) except Exception,e: tolog("!!WARNING!!1300!! Exception caught: Can not open the file %s: %s (will not send XML)" %\ (fnamelog, str(e))) node_xml = '' else: node_xml = '' for line in f: node_xml += line f.close() # transfer logfile.xml to pilot init dir for Nordugrid if os.environ.has_key('Nordugrid_pilot'): try: copy2(fnamelog, self.__pilot_initdir) except Exception, e: tolog("!!WARNING!!1600!! Exception caught: Could not copy NG log metadata file to init dir: %s" % str(e)) else: tolog("Successfully copied NG log metadata file to pilot init dir: %s" % (self.__pilot_initdir))