Ejemplo n.º 1
0
def addToTotalSize(path, total_size):
    """ Add the size of file with 'path' to the total size of all in/output files """

    if os.path.exists(path):
        from SiteMover import SiteMover
        sitemover = SiteMover()

        # Get the file size
        fsize = sitemover.getLocalFileSize(path)
        tolog("Size of file %s: %s B" % (path, fsize))
        if fsize != "":
            total_size += long(fsize)
    else:
        tolog("Skipping file %s in work dir size check since it is not present" % (path))

    return total_size
Ejemplo n.º 2
0
def prepareOutFiles(outFiles, logFile, workdir):
    """ verify and prepare and the output files for transfer """

    ec = 0
    pilotErrorDiag = ""
    outs = []
    modt = []

    from SiteMover import SiteMover
    for outf in outFiles:
        if outf and outf != 'NULL': # non-empty string and not NULL
            if not os.path.isfile("%s/%s" % (workdir, outf)): # expected output file is missing
                pilotErrorDiag = "Expected output file %s does not exist" % (outf)
                tolog("!!FAILED!!3000!! %s" % (pilotErrorDiag))
                error = PilotErrors()
                ec = error.ERR_MISSINGOUTPUTFILE
                break
            else:
                outs.append(outf)
                # get the modification time for the file (needed by NG)
                modt.append(SiteMover.getModTime(workdir, outf))

                tolog("Output file(s):")
                try:
                    _ec, _rs = commands.getstatusoutput("ls -l %s/%s" % (workdir, outf))
                except Exception, e:
                    tolog(str(e))
                else:
                    tolog(_rs)
Ejemplo n.º 3
0
    def updateSURLDictionary(self, guid, surl, directory, jobId): # OLD functuonality: FIX ME LATER: avoid using intermediate buffer
        """
            add the guid and surl to the surl dictionary
        """

        # temporary quick workaround: TO BE properly implemented later
        # the data should be passed directly instead of using intermediate JSON/CPICKLE file buffers
        #

        from SiteMover import SiteMover
        return SiteMover.updateSURLDictionary(guid, surl, directory, jobId)
Ejemplo n.º 4
0
    def performLRCFileRegistration(self, fields, ub=None):
        """ actually register all files in the LRC """

        from urllib import urlencode, urlopen
        ret = '1'
        ec = 0
        if ub != "None" and ub != None and ub != "": # ub is 'None' outside the US
            # find out if checksum or adler32 should be added
            from SiteMover import SiteMover
            _checksum = fields[4].split("+")[0] # first one, assume same type for the rest
            if len(_checksum) > 0:
                csumtype = SiteMover.getChecksumType(_checksum)
            else:
                csumtype = CMD_CHECKSUM # use default (md5sum)

            if csumtype == "adler32":
                params = urlencode({'pfns': fields[0], 'lfns': fields[1], 'guids': fields[2], 'fsizes': fields[3],\
                                    'md5sums': '', 'adler32s': fields[4], 'archivals': fields[5]})        
            else:
                params = urlencode({'pfns': fields[0], 'lfns': fields[1], 'guids': fields[2], 'fsizes': fields[3],\
                                    'md5sums': fields[4], 'adler32s': '', 'archivals': fields[5]})        
            try:
                url = ub + '/lrc/files'
                if url.find('//lrc') > 0:
                    url = url.replace('//lrc','/lrc')
                tolog("Will send params: %s" % str(params))
                tolog("Trying urlopen with: %s" % (url))
                f = urlopen(url, params)
            except Exception, e:
                tolog("!!WARNING!!4000!! Unexpected exception: %s" % str(e))
                ec = error.ERR_DDMREG
                ret = str(e)
            else:
                ret = f.read()
                if ret != '1':
                    ret = ret.replace('\n', ' ')
                    tolog('!!WARNING!!4000!! LRC registration error: %s' % str(ret))
                    tolog('!!WARNING!!4000!! LRC URL requested: %s' % f.geturl())
                    if ret == 'LFNnonunique':
                        ec = error.ERR_LRCREGDUP
                    elif ret.find("guid-metadata entry already exists") >= 0:
                        ec = error.ERR_GUIDSEXISTSINLRC
                    else:
                        ec = error.ERR_DDMREG
Ejemplo n.º 5
0
def prepareOutFiles(outFiles, logFile, workdir, fullpath=False):
    """ verify and prepare the output files for transfer """

    # fullpath = True means that the file in outFiles already has a full path, adding it to workdir is then not needed
    ec = 0
    pilotErrorDiag = ""
    outs = []
    modt = []

    from SiteMover import SiteMover
    for outf in outFiles:
        if outf and outf != 'NULL': # non-empty string and not NULL
            path = os.path.join(workdir, outf)
            if (not os.path.isfile(path) and not fullpath) or (not os.path.isfile(outf) and fullpath):
                pilotErrorDiag = "Expected output file %s does not exist" % (path)
                tolog("!!FAILED!!3000!! %s" % (pilotErrorDiag))
                error = PilotErrors()
                ec = error.ERR_MISSINGOUTPUTFILE
                break
            else:
                tolog("outf = %s" % (outf))
                if fullpath:
                    # remove the full path here from outf
                    workdir = os.path.dirname(outf)
                    outf = os.path.basename(outf)

                outs.append(outf)

                # get the modification time for the file (needed by NG)
                modt.append(SiteMover.getModTime(workdir, outf))

                tolog("Output file(s):")
                try:
                    _ec, _rs = commands.getstatusoutput("ls -l %s/%s" % (workdir, outf))
                except Exception, e:
                    tolog(str(e))
                else:
                    tolog(_rs)
def prepareOutFiles(outFiles, logFile, workdir, fullpath=False):
    """ verify and prepare and the output files for transfer """

    # fullpath = True means that the file in outFiles already has a full path, adding it to workdir is then not needed
    ec = 0
    pilotErrorDiag = ""
    outs = []
    modt = []

    from SiteMover import SiteMover
    for outf in outFiles:
        if outf and outf != 'NULL': # non-empty string and not NULL
            if (not os.path.isfile("%s/%s" % (workdir, outf)) and not fullpath) or (not os.path.isfile(outf) and fullpath): # expected output file is missing
                pilotErrorDiag = "Expected output file %s does not exist" % (outf)
                tolog("!!FAILED!!3000!! %s" % (pilotErrorDiag))
                error = PilotErrors()
                ec = error.ERR_MISSINGOUTPUTFILE
                break
            else:
                tolog("outf = %s" % (outf))
                if fullpath:
                    # remove the full path here from outf
                    workdir = os.path.dirname(outf)
                    outf = os.path.basename(outf)

                outs.append(outf)

                # get the modification time for the file (needed by NG)
                modt.append(SiteMover.getModTime(workdir, outf))

                tolog("Output file(s):")
                try:
                    _ec, _rs = commands.getstatusoutput("ls -l %s/%s" % (workdir, outf))
                except Exception, e:
                    tolog(str(e))
                else:
                    tolog(_rs)
Ejemplo n.º 7
0
    def updatePandaServer(self, job, site, workerNode, port, xmlstr=None, spaceReport=False, log=None, ra=0, jr=False, useCoPilot=False, stdout_tail="", stdout_path="", additionalMetadata=None):
        """
        Update the job status with the jobdispatcher web server.
        State is a tuple of (jobId, ["jobstatus", transExitCode, pilotErrorCode], timestamp)
        log = log extracts
        xmlstr is set in postJobTask for finished jobs (all files). Failed jobs will only send xml for log (created in this function)
        jr = job recovery mode
        """
    
        tolog("Updating job status in updatePandaServer(): PandaId=%s, result=%s, time=%s" % (job.getState()))

        # set any holding job to failed for sites that do not use job recovery (e.g. sites with LSF, that immediately
        # removes any work directory after the LSF job finishes which of course makes job recovery impossible)
        if not self.__jobrec:
            if job.result[0] == 'holding' and site.sitename != "CERNVM":
                job.result[0] = 'failed'
                tolog("This site does not support job recovery: HOLDING state reset to FAILED")

        # note: any changed job state above will be lost for fake server updates, does it matter?

        # get the node structure expected by the server
        node = self.getNodeStructure(job, site, workerNode, spaceReport=spaceReport, log=log)

        # skip the server update (e.g. on NG)
        if not self.__updateServer:
            tolog("(fake server update)")
            return 0, node

        tolog("xmlstr = %s" % (xmlstr))

        # get the xml
        node['xml'] = self.getXML(job, site.sitename, site.workdir, xmlstr=xmlstr, jr=jr)

        # stdout tail in case job.debug == 'true'
        if job.debug.lower() == "true" and stdout_tail != "":
            # protection for potentially large tails
            stdout_tail = stdout_tail[-2048:]
            node['stdout'] = stdout_tail
            tolog("Will send stdout tail:\n%s (length = %d)" % (stdout_tail, len(stdout_tail)))

            # also send the full stdout to a text indexer if required
            if stdout_path != "":
                if "stdout_to_text_indexer" in readpar('catchall') and os.path.exists(stdout_path):
                    tolog("Will send payload stdout to text indexer")

                    # get the user name, which we will use to create a proper filename
                    from SiteMover import SiteMover
                    s = SiteMover()
                    username = s.extractUsername(job.prodUserID)

                    # get setup path for xrdcp
                    try:
                        si = getSiteInformation(job.experiment)
                        setup_path = si.getLocalROOTSetup()

                        filename = "PanDA_payload_stdout-%s.txt" % (job.jobId)
                        dateDirs = self.getDateDirs()
                        remotePath = os.path.join(os.path.join(username, dateDirs), filename)
                        url = "root://faxbox.mwt2.org//group/logs/pilot/%s" % (remotePath)
                        cmd = "%sxrdcp -f %s %s" % (setup_path, stdout_path, url)
                        tolog("Executing command: %s" % (cmd))
                        rc, rs = getstatusoutput(cmd)
                        tolog("rc=%d, rs=%s" % (rc, rs))
                    except Exception, e:
                        tolog("!!WARNING!!3322!! Failed with text indexer: %s" % (e))
            else:
                tolog("stdout_path not set")
Ejemplo n.º 8
0
    def getXML(self, job, sitename, workdir, xmlstr=None, jr=False):
        """ Get the metadata xml """

        node_xml = ""
        tolog("getXML called")

        # for backwards compatibility
        try:
            experiment = job.experiment
        except:
            experiment = "unknown"

        # do not send xml for state 'holding' (will be sent by a later pilot during job recovery)
        if job.result[0] == 'holding' and sitename != "CERNVM":
            pass
        else:
            # only create and send log xml if the log was transferred
            if job.result[0] == 'failed' and isLogfileCopied(workdir):
                # generate the xml string for log file
                # at this time the job.workdir might have been removed (because this function can be called
                # after the removal of workdir is done), so we make a new dir
                xmldir = "%s/XML4PandaJob_%s" % (workdir, job.jobId)
                # group rw permission added as requested by LYON
                ec, rv = getstatusoutput("mkdir -m g+rw %s" % (xmldir))
                if ec != 0:
                    tolog("!!WARNING!!1300!! Could not create xmldir from updatePandaServer: %d, %s (resetting to site workdir)" % (ec, rv))
                    cmd = "ls -l %s" % (xmldir)
                    out = getoutput(cmd)
                    tolog("%s \n%s" % (cmd, out))
                    xmldir = workdir

                # which checksum command should be used? query the site mover
                from SiteMoverFarm import getSiteMover
                sitemover = getSiteMover(readpar('copytool'), "")

                if os.environ.has_key('Nordugrid_pilot'):
                    fname = os.path.join(self.__pilot_initdir, job.logFile)
                else:
                    fname = os.path.join(workdir, job.logFile)
                if os.path.exists(fname):
                    fnamelog = "%s/logfile.xml" % (xmldir)
                    guids_status = PFCxml(experiment, fnamelog, fntag="lfn", alog=job.logFile, alogguid=job.tarFileGuid, jr=jr)
                    from SiteMover import SiteMover
                    ec, pilotErrorDiag, _fsize, _checksum = SiteMover.getLocalFileInfo(fname, csumtype=sitemover.getChecksumCommand())
                    if ec != 0:
                        tolog("!!WARNING!!1300!! getLocalFileInfo failed: (%d, %s, %s)" % (ec, str(_fsize), str(_checksum)))
                        tolog("!!WARNING!!1300!! Can not set XML (will not be sent to server)")
                        node_xml = ''
                    else:
                        ec, _strXML = updateMetadata(fnamelog, _fsize, _checksum)
                        if ec == 0:
                            tolog("Added (%s, %s) to metadata file (%s)" % (_fsize, _checksum, fnamelog))
                        else:
                            tolog("!!WARNING!!1300!! Could not add (%s, %s) to metadata file (%s). XML will be incomplete: %d" %\
                                  (_fsize, _checksum, fnamelog, ec))

                        # add skipped file info
                        _skippedfname = os.path.join(workdir, "skipped.xml")
                        if os.path.exists(_skippedfname):
                            ec = addSkippedToPFC(fnamelog, _skippedfname)

                        try:
                            f = open(fnamelog)
                        except Exception,e:
                            tolog("!!WARNING!!1300!! Exception caught: Can not open the file %s: %s (will not send XML)" %\
                                  (fnamelog, str(e)))
                            node_xml = ''
                        else:
                            node_xml = ''
                            for line in f:
                                node_xml += line
                            f.close()

                            # transfer logfile.xml to pilot init dir for Nordugrid
                            if os.environ.has_key('Nordugrid_pilot'):
                                try:
                                    copy2(fnamelog, self.__pilot_initdir)
                                except Exception, e:
                                    tolog("!!WARNING!!1600!! Exception caught: Could not copy NG log metadata file to init dir: %s" % str(e))
                                else:
                                    tolog("Successfully copied NG log metadata file to pilot init dir: %s" % (self.__pilot_initdir))
Ejemplo n.º 9
0
    def updatePandaServer(self, job, site, workerNode, port, xmlstr=None, spaceReport=False, log=None, ra=0, jr=False, useCoPilot=False, stdout_tail="", stdout_path="", additionalMetadata=None):
        """
        Update the job status with the jobdispatcher web server.
        State is a tuple of (jobId, ["jobstatus", transExitCode, pilotErrorCode], timestamp)
        log = log extracts
        xmlstr is set in postJobTask for finished jobs (all files). Failed jobs will only send xml for log (created in this function)
        jr = job recovery mode
        """
    
        tolog("Updating job status in updatePandaServer(): PandaId=%s, result=%s, time=%s" % (job.getState()))

        # set any holding job to failed for sites that do not use job recovery (e.g. sites with LSF, that immediately
        # removes any work directory after the LSF job finishes which of course makes job recovery impossible)
        if not self.__jobrec:
            if job.result[0] == 'holding' and site.sitename != "CERNVM":
                job.result[0] = 'failed'
                tolog("This site does not support job recovery: HOLDING state reset to FAILED")

        # note: any changed job state above will be lost for fake server updates, does it matter?

        # get the node structure expected by the server
        node = self.getNodeStructure(job, site, workerNode, spaceReport=spaceReport, log=log)

        # skip the server update (e.g. on NG)
        if not self.__updateServer:
            tolog("(fake server update)")
            return 0, node

        tolog("xmlstr = %s" % (xmlstr))

        # get the xml
        node['xml'] = self.getXML(job, site.sitename, site.workdir, xmlstr=xmlstr, jr=jr)

        # stdout tail in case job.debug == 'true'
        if job.debug.lower() == "true" and stdout_tail != "":
            # protection for potentially large tails
            stdout_tail = stdout_tail[-2048:]
            node['stdout'] = stdout_tail
            tolog("Will send stdout tail:\n%s (length = %d)" % (stdout_tail, len(stdout_tail)))

            # also send the full stdout to a text indexer if required
            if stdout_path != "":
                if "stdout_to_text_indexer" in readpar('catchall') and os.path.exists(stdout_path):
                    tolog("Will send payload stdout to text indexer")

                    # get the user name, which we will use to create a proper filename
                    from SiteMover import SiteMover
                    s = SiteMover()
                    username = s.extractUsername(job.prodUserID)

                    # get setup path for xrdcp
                    try:
                        si = getSiteInformation(job.experiment)
                        setup_path = si.getLocalROOTSetup()

                        filename = "PanDA_payload_stdout-%s.txt" % (job.jobId)
                        dateDirs = self.getDateDirs()
                        remotePath = os.path.join(os.path.join(username, dateDirs), filename)
                        url = "root://faxbox.mwt2.org//group/logs/pilot/%s" % (remotePath)
                        cmd = "%sxrdcp -f %s %s" % (setup_path, stdout_path, url)
                        tolog("Executing command: %s" % (cmd))
                        rc, rs = getstatusoutput(cmd)
                        tolog("rc=%d, rs=%s" % (rc, rs))
                    except Exception, e:
                        tolog("!!WARNING!!3322!! Failed with text indexer: %s" % (e))
            else:
                tolog("stdout_path not set")
Ejemplo n.º 10
0
    def getXML(self, job, sitename, workdir, xmlstr=None, jr=False):
        """ Get the metadata xml """

        node_xml = ""
        tolog("getXML called")

        # for backwards compatibility
        try:
            experiment = job.experiment
        except:
            experiment = "unknown"

        # do not send xml for state 'holding' (will be sent by a later pilot during job recovery)
        if job.result[0] == 'holding' and sitename != "CERNVM":
            pass
        else:
            # only create and send log xml if the log was transferred
            if job.result[0] == 'failed' and isLogfileCopied(workdir):
                # generate the xml string for log file
                # at this time the job.workdir might have been removed (because this function can be called
                # after the removal of workdir is done), so we make a new dir
                xmldir = "%s/XML4PandaJob_%s" % (workdir, job.jobId)
                # group rw permission added as requested by LYON
                ec, rv = getstatusoutput("mkdir -m g+rw %s" % (xmldir))
                if ec != 0:
                    tolog("!!WARNING!!1300!! Could not create xmldir from updatePandaServer: %d, %s (resetting to site workdir)" % (ec, rv))
                    cmd = "ls -l %s" % (xmldir)
                    out = getoutput(cmd)
                    tolog("%s \n%s" % (cmd, out))
                    xmldir = workdir

                # which checksum command should be used? query the site mover
                from SiteMoverFarm import getSiteMover
                sitemover = getSiteMover(readpar('copytool'), "")

                if os.environ.has_key('Nordugrid_pilot'):
                    fname = os.path.join(self.__pilot_initdir, job.logFile)
                else:
                    fname = os.path.join(workdir, job.logFile)
                if os.path.exists(fname):
                    fnamelog = "%s/logfile.xml" % (xmldir)
                    guids_status = PFCxml(experiment, fnamelog, fntag="lfn", alog=job.logFile, alogguid=job.tarFileGuid, jr=jr)
                    from SiteMover import SiteMover
                    ec, pilotErrorDiag, _fsize, _checksum = SiteMover.getLocalFileInfo(fname, csumtype=sitemover.getChecksumCommand())
                    if ec != 0:
                        tolog("!!WARNING!!1300!! getLocalFileInfo failed: (%d, %s, %s)" % (ec, str(_fsize), str(_checksum)))
                        tolog("!!WARNING!!1300!! Can not set XML (will not be sent to server)")
                        node_xml = ''
                    else:
                        ec, _strXML = updateMetadata(fnamelog, _fsize, _checksum)
                        if ec == 0:
                            tolog("Added (%s, %s) to metadata file (%s)" % (_fsize, _checksum, fnamelog))
                        else:
                            tolog("!!WARNING!!1300!! Could not add (%s, %s) to metadata file (%s). XML will be incomplete: %d" %\
                                  (_fsize, _checksum, fnamelog, ec))

                        # add skipped file info
                        _skippedfname = os.path.join(workdir, "skipped.xml")
                        if os.path.exists(_skippedfname):
                            ec = addSkippedToPFC(fnamelog, _skippedfname)

                        try:
                            f = open(fnamelog)
                        except Exception,e:
                            tolog("!!WARNING!!1300!! Exception caught: Can not open the file %s: %s (will not send XML)" %\
                                  (fnamelog, str(e)))
                            node_xml = ''
                        else:
                            node_xml = ''
                            for line in f:
                                node_xml += line
                            f.close()

                            # transfer logfile.xml to pilot init dir for Nordugrid
                            if os.environ.has_key('Nordugrid_pilot'):
                                try:
                                    copy2(fnamelog, self.__pilot_initdir)
                                except Exception, e:
                                    tolog("!!WARNING!!1600!! Exception caught: Could not copy NG log metadata file to init dir: %s" % str(e))
                                else:
                                    tolog("Successfully copied NG log metadata file to pilot init dir: %s" % (self.__pilot_initdir))