예제 #1
0
def run_command(data, config, env, monalisa):
    cmd = config['executable']
    args = config['arguments']
    if 'cmsRun' in cmd:
        pset = config['pset']
        pset_mod = pset.replace(".py", "_mod.py")
        shutil.copy2(pset, pset_mod)

        edit_process_source(pset_mod, config)

        cmd = [cmd, '-j', 'report.xml', pset_mod]
        cmd.extend([str(arg) for arg in args])
    else:
        usage = resource.getrusage(resource.RUSAGE_CHILDREN)
        if isinstance(cmd, basestring):
            cmd = shlex.split(cmd)
        if os.path.isfile(cmd[0]):
            cmd[0] = os.path.join(os.getcwd(), cmd[0])
        cmd.extend([str(arg) for arg in args])

        if config.get('append inputs to args', False):
            cmd.extend([str(f) for f in config['mask']['files']])

    p = run_subprocess(cmd, env=env)
    logger.info("executable returned with exit code {0}.".format(p.returncode))
    data['exe_exit_code'] = p.returncode
    data['task_exit_code'] = data['exe_exit_code']

    # Dashboard does not like Unicode, just ASCII encoding
    monitorid = str(config['monitoring']['monitorid'])
    taskid = str(config['monitoring']['taskid'])
    apmonSend(taskid, monitorid, {
        'ExeEnd': config['executable'],
        'NCores': config.get('cores', 1)
    }, logging.getLogger('mona'), monalisa)

    if 'cmsRun' in config['executable']:
        if p.returncode == 0:
            parse_fwk_report(data, config, 'report.xml')
            calculate_alder32(data)
        else:
            parse_fwk_report(data, config, 'report.xml', exitcode=p.returncode)
    else:
        data['files']['info'] = dict(
            (f, [0, []]) for f in config['file map'].values())
        data['files']['output_info'] = dict((f, {
            'runs': {},
            'events': 0,
            'adler32': '0'
        }) for f, rf in config['output files'])
        data['cpu_time'] = usage.ru_stime

    if p.returncode != 0:
        raise subprocess.CalledProcessError
예제 #2
0
    def handleJobStatusChange(self, jobs, statusValue, statusMessage):
        """
        _handleJobStatusChange_

        Handle the submitted, completed or killed jobs:
        Publish the status information to the dashboard
        jobs must be a list of dictionaries with the following information
        (* denotes an optional argument):
            workflow -> Name of the workflow
            name -> unique name of the job
            retry_count -> retry count of the job
            statusValue -> Job status value
            statusMessage -> Message from the new status
            *location -> Computing element the job is destinated to
            *fwjr -> Post processing step information
        """
        logging.info("Handling %d jobs" % len(jobs))
        logging.debug("Handling jobs: %s" % jobs)

        for job in jobs:
            logging.debug("Sending info for job %s" % str(job))

            package = {}
            package['MessageType']       = 'JobStatus'
            package['jobId']             = '%s_%i' % (job['name'],
                                                    job['retry_count'])
            package['taskId']            = self.taskPrefix + job['workflow']
            package['StatusValue']       = statusValue
            package['StatusValueReason'] = statusMessage
            package['StatusEnterTime']   = time.strftime(self.tsFormat,
                                        time.gmtime())
            package['StatusDestination'] = job.get('location',
                                                   'NotAvailable')

            if job.get('plugin', None):
                package['scheduler']     = job['plugin'][:-6]

            logging.debug("Sending: %s" % str(package))
            result = apmonSend(taskid = package['taskId'],
                               jobid = package['jobId'],
                               params = package,
                               logr = logging,
                               apmonServer = self.serverreport)

            if result != 0:
                msg =  "Error %i sending info for submitted job %s via UDP\n" \
                        % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" \
                              % (self.destHost,
                                 self.destPort))
            apmonFree()

            if 'fwjr' in job:
                self.handleSteps(job)

        return
예제 #3
0
    def testApmonSend(self):
        """
        _testApmonSend_

        Just test simple apmonSend with fake data
        """
        # We just sent fake data which is not monitored by dashboard.
        self.assertEqual(0, apmonSend("TaskID", "Job1", {"CPUUsage": 100, "MemUsage": 1}))
예제 #4
0
def send_final_dashboard_update(data, config, monalisa):
    cputime = data['cpu_time']
    events_per_run = data['events_per_run']
    exe_exit_code = data['exe_exit_code']
    exe_time = data['task_timing']['stage_out_end'] - data['task_timing'][
        'prologue_end']
    task_exit_code = data['task_exit_code']
    total_time = data['task_timing']['stage_out_end'] - data['task_timing'][
        'wrapper_start']
    stageout_exit_code = data['stageout_exit_code']
    stageout_se = data['output_storage_element']

    logger.debug("Execution time {}".format(total_time))
    logger.debug("Exiting with code {}".format(task_exit_code))
    logger.debug("Reporting ExeExitCode {}".format(exe_exit_code))
    logger.debug("Reporting StageOutSE {}".format(stageout_se))
    logger.debug("Reporting StageOutExitCode {}".format(stageout_exit_code))

    parameters = {
        'ExeTime': str(exe_time),
        'ExeExitCode': str(exe_exit_code),
        'JobExitCode': str(task_exit_code),
        'JobExitReason': '',
        'StageOutSE': stageout_se,
        'StageOutExitStatus': str(stageout_exit_code),
        'StageOutExitStatusReason': 'Copy succedeed with srm-lcg utils',
        'CrabUserCpuTime': str(cputime),
        'CrabWrapperTime': str(total_time),
        'WCCPU': str(total_time),
        'NoEventsPerRun': str(events_per_run),
        'NbEvPerRun': str(events_per_run),
        'NEventsProcessed': str(events_per_run)
    }
    try:
        parameters.update(
            {'CrabCpuPercentage': str(float(cputime) / float(total_time))})
    except Exception:
        pass

    monitorid = str(config['monitoring']['monitorid'])
    taskid = str(config['monitoring']['taskid'])

    apmonSend(taskid, monitorid, parameters, logging.getLogger('mona'),
              monalisa)
    apmonFree()
예제 #5
0
    def publish(self, data, redundancy = 1):
        """
        _publish_

        Publish information in this object to the Dashboard
        using the ApMon interface and the destinations stored in this
        instance.

        redunancy is the amount to times to publish this information

        """

        logging.info("About to send UDP package to dashboard: %s" % data)
        logging.info("Using address %s" % self.server)
        apmonSend(taskid = self.taskName, jobid = self.jobName, params = data,
                  logr = logging, apmonServer = self.server)
        apmonFree()
        return
예제 #6
0
    def handleJobStatusChange(self, jobs, statusValue, statusMessage):
        """
        _handleJobStatusChange_

        Handle the submitted, completed or killed jobs:
        Publish the status information to the dashboard
        jobs must be a list of dictionaries with the following information
        (* denotes an optional argument):
            workflow -> Name of the workflow
            name -> unique name of the job
            retry_count -> retry count of the job
            statusValue -> Job status value
            statusMessage -> Message from the new status
            *location -> Computing element the job is destinated to
            *fwjr -> Post processing step information
        """
        logging.info("Handling %d jobs" % len(jobs))
        logging.debug("Handling jobs: %s" % jobs)

        for job in jobs:
            logging.debug("Sending info for job %s" % str(job))

            package = {}
            package['MessageType'] = 'JobStatus'
            package['jobId'] = '%s_%i' % (job['name'], job['retry_count'])
            package['taskId'] = self.taskPrefix + job['workflow']
            package['StatusValue'] = statusValue
            package['StatusValueReason'] = statusMessage
            package['StatusEnterTime'] = time.strftime(self.tsFormat,
                                                       time.gmtime())
            package['StatusDestination'] = job.get('location', 'NotAvailable')

            if job.get('plugin', None):
                package['scheduler'] = job['plugin'][:-6]

            logging.debug("Sending: %s" % str(package))
            result = apmonSend(taskid=package['taskId'],
                               jobid=package['jobId'],
                               params=package,
                               logr=logging,
                               apmonServer=self.serverreport)

            if result != 0:
                msg =  "Error %i sending info for submitted job %s via UDP\n" \
                        % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" \
                              % (self.destHost,
                                 self.destPort))
            apmonFree()

            if 'fwjr' in job:
                self.handleSteps(job)

        return
예제 #7
0
    def handleSteps(self, job):
        """
        _handleSteps_

        Handle the post-processing step information
        """
        if job['fwjr'] == None:
            return

        steps = job['fwjr'].listSteps()
        for stepName in steps:
            step = job['fwjr'].retrieveStep(stepName)
            if not hasattr(step, 'counter'):
                continue

            counter = step.counter

            package = {}

            package.update(self.getPerformanceInformation(step))
            package.update(self.getEventInformation(stepName, job['fwjr']))

            trimmedPackage = {}
            for key in package:
                if package[key] != None:
                    trimmedPackage['%d_%s' % (counter, key)] = package[key]
            package = trimmedPackage

            if not package:
                continue

            package['jobId'] = '%s_%i' % (job['name'], job['retry_count'])
            package['taskId'] = self.taskPrefix + job['workflow']
            package['%d_stepName' % counter] = stepName

            logging.debug("Sending step info: %s" % str(package))
            result = apmonSend(taskid=package['taskId'],
                               jobid=package['jobId'],
                               params=package,
                               logr=logging,
                               apmonServer=self.serverreport)

            if result != 0:
                msg =  "Error %i sending info for completed job %s via UDP\n" \
                        % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" \
                              % (self.destHost,
                                 self.destPort))
        apmonFree()

        return
예제 #8
0
    def handleSteps(self, job):
        """
        _handleSteps_

        Handle the post-processing step information
        """
        if job['fwjr'] == None:
            return

        steps = job['fwjr'].listSteps()
        for stepName in steps:
            step = job['fwjr'].retrieveStep(stepName)
            if not hasattr(step, 'counter'):
                continue

            counter = step.counter

            package = {}

            package.update(self.getPerformanceInformation(step))
            package.update(self.getEventInformation(stepName, job['fwjr']))

            trimmedPackage = {}
            for key in package:
                if package[key] != None:
                    trimmedPackage['%d_%s' % (counter, key)] = package[key]
            package = trimmedPackage

            if not package:
                continue

            package['jobId']    = '%s_%i' % (job['name'], job['retry_count'])
            package['taskId']   = self.taskPrefix + job['workflow']
            package['%d_stepName' % counter] = stepName


            logging.debug("Sending step info: %s" % str(package))
            result = apmonSend(taskid = package['taskId'],
                               jobid = package['jobId'], params = package,
                               logr = logging, apmonServer = self.serverreport)

            if result != 0:
                msg =  "Error %i sending info for completed job %s via UDP\n" \
                        % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" \
                              % (self.destHost,
                                 self.destPort))
        apmonFree()

        return
예제 #9
0
    def testApmonSend(self):
        """
        _testApmonSend_

        Just test simple apmonSend with fake data
        """
        # We just sent fake data which is not monitored by dashboard.
        self.assertEqual(
            0, apmonSend("TaskID", "Job1", {
                "CPUUsage": 100,
                "MemUsage": 1
            }))
예제 #10
0
    def handleCreated(self, jobs):
        """
        _handleCreated_

        Handle the created jobs:
        Publish the jobs' meta information (and tasks' if not in the Cache) to
        the dashboard
        jobs must be a list of dictionaries with the following information
        (* denotes an optional argument):
            workflow -> Name of the workflow
            name -> unique name of the job
            retry_count -> retry count of the job
            taskType -> Workflow type (analysis, production, etc...)
            jobType -> Job type (merge, processing, etc...)
            *NEventsToprocess -> Number of events the job will process
        Additionally the job should carry information about the task according
        to the description of the addTask method
        """
        logging.info ("Handling %d created jobs" % len(jobs))
        logging.debug ("Handling created jobs: %s" % jobs)

        for job in jobs:
            logging.debug("Sending info for job %s" % str(job))

            package = {}
            package['MessageType']      = 'JobMeta'
            package['taskId']           = self.taskPrefix + \
                                               job['workflow']
            package['jobId']            = '%s_%i' % (job['name'],
                                                    job['retry_count'])
            package['TaskType']         = job['taskType']
            package['JobType']          = job['jobType']
            package['NEventsToProcess'] = job.get('nEventsToProc',
                                                    'NotAvailable')

            logging.debug("Sending: %s" % str(package))
            result = apmonSend(taskid = package['taskId'],
                               jobid = package['jobId'],
                               params = package,
                               logr = logging,
                               apmonServer = self.serverreport)
            if result != 0:
                msg = "Error %i sending info for submitted job %s via UDP\n" \
                      % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" \
                              % (self.destHost,
                                 self.destPort))
            apmonFree()

        return
예제 #11
0
    def publish(self, data, redundancy=1):
        """
        _publish_

        Publish information in this object to the Dashboard
        using the ApMon interface and the destinations stored in this
        instance.

        redunancy is the amount to times to publish this information

        """

        logging.info("About to send UDP package to dashboard: %s" % data)
        logging.info("Using address %s" % self.server)
        apmonSend(taskid=self.taskName,
                  jobid=self.jobName,
                  params=data,
                  logr=logging,
                  apmonServer=self.server)
        apmonFree()
        return
예제 #12
0
    def handleCreated(self, jobs):
        """
        _handleCreated_

        Handle the created jobs:
        Publish the jobs' meta information (and tasks' if not in the Cache) to
        the dashboard
        jobs must be a list of dictionaries with the following information
        (* denotes an optional argument):
            workflow -> Name of the workflow
            name -> unique name of the job
            retry_count -> retry count of the job
            taskType -> Workflow type (analysis, production, etc...)
            jobType -> Job type (merge, processing, etc...)
            *NEventsToprocess -> Number of events the job will process
        Additionally the job should carry information about the task according
        to the description of the addTask method
        """
        logging.info("Handling %d created jobs" % len(jobs))
        logging.debug("Handling created jobs: %s" % jobs)

        for job in jobs:
            logging.debug("Sending info for job %s" % str(job))

            package = {}
            package['MessageType'] = 'JobMeta'
            package['taskId']           = self.taskPrefix + \
                                               job['workflow']
            package['jobId'] = '%s_%i' % (job['name'], job['retry_count'])
            package['TaskType'] = job['taskType']
            package['JobType'] = job['jobType']
            package['NEventsToProcess'] = job.get('nEventsToProc',
                                                  'NotAvailable')

            logging.debug("Sending: %s" % str(package))
            result = apmonSend(taskid=package['taskId'],
                               jobid=package['jobId'],
                               params=package,
                               logr=logging,
                               apmonServer=self.serverreport)
            if result != 0:
                msg = "Error %i sending info for submitted job %s via UDP\n" \
                      % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" \
                              % (self.destHost,
                                 self.destPort))
            apmonFree()

        return
예제 #13
0
    def handleSteps(self, job):
        """
        _handleSteps_

        Handle the post-processing step information
        """

        performanceSteps = job['performance']
        for stepName in performanceSteps.keys():
            performance = performanceSteps[stepName]
            package = {}
            package['jobId']                  = '%s_%i' % (job['name'], job['retryCount'])
            package['taskId']                 = 'wmagent_%s' % job['requestName']
            package['stepName']               = stepName
            package['PeakValueRss'] 	      = performance['memory'].get('PeakValueRss', None)
            package['PeakValueVsize'] 	      = performance['memory'].get('PeakValueVsize', None)
            package['writeTotalMB']           = performance['storage'].get('writeTotalMB', None)
            package['readPercentageOps']      = performance['storage'].get('readPercentageOps', None)
            package['readAveragekB'] 	      = performance['storage'].get('readAveragekB', None)
            package['readTotalMB'] 	      = performance['storage'].get('readTotalMB', None)
            package['readNumOps']  	      = performance['storage'].get('readNumOps', None)
            package['readCachePercentageOps'] = performance['storage'].get('readCachePercentageOps', None)
            package['readMBSec']              = performance['storage'].get('readMBSec', None)
            package['readMaxMSec']            = performance['storage'].get('readMaxMSec', None)
            package['readTotalSecs'] 	      = performance['storage'].get('readTotalSecs', None) 
            package['writeTotalSecs'] 	      = performance['storage'].get('writeTotalSecs', None) 
            package['TotalJobCPU']            = performance['cpu'].get('TotalJobCPU', None)
            package['TotalEventCPU'] 	      = performance['cpu'].get('TotalEventCPU', None)
            package['AvgEventCPU'] 	      = performance['cpu'].get('AvgEventCPU', None)
            package['AvgEventTime'] 	      = performance['cpu'].get('AvgEventTime', None)
            package['MinEventCPU']            = performance['cpu'].get('MinEventCPU', None)
            package['MaxEventTime'] 	      = performance['cpu'].get('MaxEventTime', None)
            package['TotalJobTime'] 	      = performance['cpu'].get('TotalJobTime', None)
            package['MinEventTime'] 	      = performance['cpu'].get('MinEventTime', None)
            package['MaxEventCPU']            = performance['cpu'].get('MaxEventCPU', None)
            
            logging.debug("Sending performance info: %s" % str(package))
            result = apmonSend( taskid = package['taskId'], jobid = package['jobId'], params = package, logr = logging, apmonServer = self.serverreport)
        
            if result != 0:
                msg =  "Error %i sending info for completed job %s via UDP\n" % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s, pass %s" \
                              % (self.config.DashboardReporter.dashboardHost,
                                 self.config.DashboardReporter.dashboardPort,
                                 getattr(self.config.DashboardReporter, 'dashboardPass', '')))
        apmonFree()

        return
예제 #14
0
    def handleJobStatusChange(self, jobs, statusValue, statusMessage):
        """
        _handleJobStatusChange_

        Handle the submitted, completed or killed jobs:
        Publish the status information to the dashboard
        jobs must be a list of dictionaries with the following information
        (* denotes an optional argument):
            workflow -> Name of the workflow
            name -> unique name of the job
            retry_count -> retry count of the job
            statusValue -> Job status value
            statusMessage -> Message from the new status
            *location -> Computing element the job is destinated to
            *fwjr -> Post processing step information
        """
        logging.info("Handling jobs: %s" % jobs)

        for job in jobs:
            logging.info("Sending info for job %s" % str(job))

            package = {}
            package["MessageType"] = "JobStatus"
            package["jobId"] = "%s_%i" % (job["name"], job["retry_count"])
            package["taskId"] = self.taskPrefix + job["workflow"]
            package["StatusValue"] = statusValue
            package["StatusValueReason"] = statusMessage
            package["StatusEnterTime"] = time.strftime(self.tsFormat, time.gmtime())
            package["StatusDestination"] = job.get("location", "NotAvailable")

            logging.info("Sending: %s" % str(package))
            result = apmonSend(
                taskid=package["taskId"],
                jobid=package["jobId"],
                params=package,
                logr=logging,
                apmonServer=self.serverreport,
            )

            if result != 0:
                msg = "Error %i sending info for submitted job %s via UDP\n" % (result, job["name"])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" % (self.destHost, self.destPort))
            apmonFree()

            if "fwjr" in job:
                self.handleSteps(job)

        return
예제 #15
0
def send_initial_dashboard_update(data, config, monalisa):
    # Dashboard does not like Unicode, just ASCII encoding
    monitorid = str(config['monitoring']['monitorid'])
    syncid = str(config['monitoring']['syncid'])
    taskid = str(config['monitoring']['taskid'])

    try:
        if os.environ.get("PARROT_ENABLED", "FALSE") == "TRUE":
            raise ValueError()
        sync_ce = loadSiteLocalConfig().siteName
    except Exception:
        for envvar in [
                "GLIDEIN_Gatekeeper", "OSG_HOSTNAME", "CONDORCE_COLLECTOR_HOST"
        ]:
            if envvar in os.environ:
                sync_ce = os.environ[envvar]
                break
        else:
            host = socket.getfqdn()
            sync_ce = config['default host']
            if host.rsplit('.')[-2:] == sync_ce.rsplit('.')[-2:]:
                sync_ce = config['default ce']
            else:
                sync_ce = 'Unknown'

    logger.info("using sync CE {}".format(sync_ce))

    parameters = {
        'ExeStart': str(config['executable']),
        'NCores': config.get('cores', 1),
        'SyncCE': sync_ce,
        'SyncGridJobId': syncid,
        'WNHostName': socket.getfqdn()
    }

    apmonSend(taskid, monitorid, parameters, logging.getLogger('mona'),
              monalisa)
    apmonFree()
예제 #16
0
    def publish(self, redundancy = 1, data = None):
        """
        _publish_

        Publish information in this object to the Dashboard
        using the ApMon interface and the destinations stored in this
        instance.

        redunancy is the amount to times to publish this information

        """
        #if self.publisher == None:
        #    self._InitPublisher()
      
        
        #self.publisher.connect()
        toPublish = {}
        if data:
            toPublish = data
        else:
            toPublish.update(self)
        for key, value in toPublish.items():
            if value == None:
                del toPublish[key]

                
        logging.debug("About to send UDP package to dashboard: %s" % toPublish)
        logging.debug("Using address %s" % self.server)
        apmonSend(taskid = self.taskName, jobid = self.jobName, params = toPublish,
                  logr = logging, apmonServer = self.server)
        apmonFree()
        
        #for i in range(1, redundancy+1):
        #    self.publisher.send(**toPublish)
        #    
        #self.publisher.disconnect()
        return
예제 #17
0
    def handleSubmitted(self, jobs):
        """
        _handleSubmitted_

        Handle the submitted jobs:
        Send them to Dashboard via UDP
        """
        logging.info("Handling jobs to be submitted: %s" % jobs)

        for job in jobs:
            taskName = job['requestName']
            if not taskName in self.taskCache:
                self.addTask(name = taskName, user = job.get('user', None))
            logging.info("Sending info for task %s" % str(job))

            package = {}
            package['jobId']           = '%s_%i' % (job['name'], job['retryCount'])
            package['taskId']          = 'wmagent_%s' % taskName
            package['GridJobID']       = 'NotAvailable'
            package['retryCount']      = job['retryCount']
            package['MessageTS']       = time.time()
            package['MessageType']     = 'JobMeta'
            package['JobType']         = job['jobType']
            package['TaskType']        = job['taskType']
            package['StatusValue']     = 'submitted'
            package['scheduler']       = 'BossAir'
            package['StatusEnterTime'] = job.get('timestamp', time.time())
                        
            logging.info("Sending: %s" % str(package))
            result = apmonSend(taskid = package['taskId'],
                               jobid = package['jobId'],
                               params = package,
                               logr = logging,
                               apmonServer = self.serverreport)

            if result != 0:
                msg =  "Error %i sending info for submitted job %s via UDP\n" % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s, pass %s" \
                              % (self.config.DashboardReporter.dashboardHost,
                                 self.config.DashboardReporter.dashboardPort,
                                 getattr(self.config.DashboardReporter, 'dashboardPass', '')))
            apmonFree()
            #self.apmonsender.free()    
        return
예제 #18
0
    def addTask(self, task):
        """
        _addTask_

        Add a task to the Dashboard, jobs must contain the following information
        about the task:
            application -> CMSSW release
            nevtJob -> Number of events per job
            tool -> JobSubmission tool (like Condor? or WMAgent)
            JSToolVersion -> 'tool' version
            GridName -> Subject of user grid proxy
            scheduler -> Scheduler
            TaskType -> Type of activity
            datasetFull -> Input dataset
            CMSUser -> owner of the workflow
        """
        taskName = task["name"]
        package = {}
        package["MessageType"] = "TaskMeta"
        package["application"] = task["application"]
        package["nevtJob"] = task["nevtJob"]
        package["tool"] = "WMAgent"
        package["JSToolVersion"] = __version__
        package["GridName"] = task["GridName"]
        package["scheduler"] = task["scheduler"]
        package["TaskType"] = task["TaskType"]
        package["TaskName"] = self.taskPrefix + taskName
        package["JobName"] = "taskMeta"
        package["datasetFull"] = task["datasetFull"]
        package["CMSUser"] = task["user"]

        logging.debug("Sending task info: %s" % str(package))
        result = apmonSend(
            taskid=package["TaskName"],
            jobid=package["JobName"],
            params=package,
            logr=logging,
            apmonServer=self.serverreport,
        )

        if result != 0:
            msg = "Error %i sending info for new task %s via UDP\n" % (result, taskName)
            msg += "Ignoring"
            logging.error(msg)
            logging.debug("Package sent: %s\n" % package)
            logging.debug("Host info: host %s, port %s" % (self.destHost, self.destPort))
        apmonFree()
예제 #19
0
    def addTask(self, task):
        """
        _addTask_

        Add a task to the Dashboard, jobs must contain the following information
        about the task:
            application -> CMSSW release
            tool -> JobSubmission tool (like Condor? or WMAgent)
            JSToolVersion -> 'tool' version
            TaskType -> Type of activity
            datasetFull -> Input dataset
            CMSUser -> owner of the workflow
        """
        taskName = task['name']
        package = {}
        package['MessageType'] = 'TaskMeta'
        package['application'] = task['application']
        package['tool'] = 'WMAgent'
        package['JSToolVersion'] = __version__
        package['TaskType'] = task['TaskType']
        package['TaskName'] = self.taskPrefix + taskName
        package['JobName'] = 'taskMeta'
        package['datasetFull'] = task['datasetFull']
        package['CMSUser'] = task['user']

        logging.info("Sending %s info" % taskName)
        logging.debug("Sending task info: %s" % str(package))
        result = apmonSend(taskid=package['TaskName'],
                           jobid=package['JobName'],
                           params=package,
                           logr=logging,
                           apmonServer=self.serverreport)

        if result != 0:
            msg = "Error %i sending info for new task %s via UDP\n" % (
                result, taskName)
            msg += "Ignoring"
            logging.error(msg)
            logging.debug("Package sent: %s\n" % package)
            logging.debug("Host info: host %s, port %s" \
                          % (self.destHost,
                             self.destPort))
        apmonFree()
예제 #20
0
    def addTask(self, task):
        """
        _addTask_

        Add a task to the Dashboard, jobs must contain the following information
        about the task:
            application -> CMSSW release
            tool -> JobSubmission tool (like Condor? or WMAgent)
            JSToolVersion -> 'tool' version
            TaskType -> Type of activity
            datasetFull -> Input dataset
            CMSUser -> owner of the workflow
        """
        taskName = task['name']
        package = {}
        package['MessageType']   = 'TaskMeta'
        package['application']   = task['application']
        package['tool']          = 'WMAgent'
        package['JSToolVersion'] = __version__
        package['TaskType']      = task['TaskType']
        package['TaskName']      = self.taskPrefix + taskName
        package['JobName']       = 'taskMeta'
        package['datasetFull']   = task['datasetFull']
        package['CMSUser']       = task['user']

        logging.info("Sending %s info" % taskName)
        logging.debug("Sending task info: %s" % str(package))
        result = apmonSend(taskid = package['TaskName'],
                           jobid = package['JobName'], params = package,
                           logr = logging, apmonServer = self.serverreport)

        if result != 0:
            msg =  "Error %i sending info for new task %s via UDP\n" % (result,
                                                                        taskName)
            msg += "Ignoring"
            logging.error(msg)
            logging.debug("Package sent: %s\n" % package)
            logging.debug("Host info: host %s, port %s" \
                          % (self.destHost,
                             self.destPort))
        apmonFree()
예제 #21
0
    def addTask(self, name, user):
        """
        _addTask_

        Add a task to the Dashboard
        """

        package = {}
        package['MessageType']    = 'TaskMeta'
        package['MessageTS']      = time.time()
        package['taskId']         = 'wmagent_%s' % name
        package['jobId']          = 'taskMeta'
        package['JSTool']         = 'WMAgent'
        package['JSToolVersion']  = __version__
        package['CMSUser']        = user
        package['Workflow']       = name
        package['AgentName']      = self.agentName

        logging.info("Sending info for task %s" % str(name))

        logging.debug("Sending task info: %s" % str(package))
        result = apmonSend( taskid = package['taskId'], jobid = package['jobId'], params = package, logr = logging, apmonServer = self.serverreport)


        if result != 0:
            msg =  "Error %i sending info for new task %s via UDP\n" % (result, name)
            msg += "Ignoring"
            logging.error(msg)
            self.sendAlert(6, msg = msg)
            logging.debug("Package sent: %s\n" % package)
            logging.debug("Host info: host %s, port %s, pass %s" \
                          % (self.config.DashboardReporter.dashboardHost,
                             self.config.DashboardReporter.dashboardPort,
                             getattr(self.config.DashboardReporter, 'dashboardPass', '')))
        else:
            self.taskCache.append(name)
        
        #self.apmonsender.free()
        apmonFree()
예제 #22
0
    def handleCompleted(self, jobs):
        """
        _handleCompleted_
        
        Handle the completed jobs:
        Send them to Dashboard via UDP
        """
        logging.info("Handling jobs to be completed: %s" % jobs)        

        for job in jobs:
            package = {}
            package['jobId']           = '%s_%i' % (job['name'], job['retryCount'])
            package['taskId']          = 'wmagent_%s' % job['requestName']
            package['GridJobID']       = job['name']
            package['retryCount']      = job['retryCount']
            package['MessageTS']       = time.time()
            package['MessageType']     = 'JobStatus'
            package['StatusValue']     = job['finalState']
            package['StatusEnterTime'] = job['timestamp']
            package['JobExitCode']     = job['exitCode']

            logging.info("Sending completed info: %s" % str(package))
            result = apmonSend( taskid = package['taskId'], jobid = package['jobId'], params = package, logr = logging, apmonServer = self.serverreport)

            if result != 0:
                msg =  "Error %i sending info for completed job %s via UDP\n" % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s, pass %s" \
                              % (self.config.DashboardReporter.dashboardHost,
                                 self.config.DashboardReporter.dashboardPort,
                                 getattr(self.config.DashboardReporter, 'dashboardPass', '')))
            apmonFree()

            self.handleSteps(job = job)
        return
예제 #23
0
 def send(self, taskid, params):
     apmonSend(self._workflowid, taskid, params,
               logging.getLogger("MonaLisa"), conf)
예제 #24
0
    def handleSteps(self, job):
        """
        _handleSteps_

        Handle the post-processing step information
        """
        if job["fwjr"] == None:
            return
        performanceSteps = job["fwjr"].listSteps()
        for stepName in performanceSteps:
            step = job["fwjr"].retrieveStep(stepName)
            if not hasattr(step, "performance"):
                continue
            performance = step.performance
            toReport = 3
            if not hasattr(performance, "memory"):
                performance.section_("memory")
                toReport -= 1
            if not hasattr(performance, "storage"):
                performance.section_("storage")
                toReport -= 1
            if not hasattr(performance, "cpu"):
                performance.section_("cpu")
                toReport -= 1
            # There's nothing to report, get out
            if not toReport:
                continue
            package = {}
            package["jobId"] = "%s_%i" % (job["name"], job["retry_count"])
            package["taskId"] = self.taskPrefix + job["workflow"]
            package["stepName"] = stepName
            package["PeakValueRss"] = getattr(performance.memory, "PeakValueRss", None)
            package["PeakValuePss"] = getattr(performance.memory, "PeakValuePss", None)
            package["PeakValueVsize"] = getattr(performance.memory, "PeakValueVsize", None)
            package["writeTotalMB"] = getattr(performance.storage, "writeTotalMB", None)
            package["readPercentageOps"] = getattr(performance.storage, "readPercentageOps", None)
            package["readAveragekB"] = getattr(performance.storage, "readAveragekB", None)
            package["readTotalMB"] = getattr(performance.storage, "readTotalMB", None)
            package["readNumOps"] = getattr(performance.storage, "readNumOps", None)
            package["readCachePercentageOps"] = getattr(performance.storage, "readCachePercentageOps", None)
            package["readMBSec"] = getattr(performance.storage, "readMBSec", None)
            package["readMaxMSec"] = getattr(performance.storage, "readMaxMSec", None)
            package["readTotalSecs"] = getattr(performance.storage, "readTotalSecs", None)
            package["writeTotalSecs"] = getattr(performance.storage, "writeTotalSecs", None)
            package["TotalJobCPU"] = getattr(performance.cpu, "TotalJobCPU", None)
            package["AvgEventCPU"] = getattr(performance.cpu, "AvgEventCPU", None)
            package["MaxEventTime"] = getattr(performance.cpu, "MaxEventTime", None)
            package["AvgEventTime"] = getattr(performance.cpu, "AvgEventTime", None)
            package["MinEventCPU"] = getattr(performance.cpu, "MinEventCPU", None)
            package["TotalEventCPU"] = getattr(performance.cpu, "TotalEventCPU", None)
            package["TotalJobTime"] = getattr(performance.cpu, "TotalJobTime", None)
            package["MinEventTime"] = getattr(performance.cpu, "MinEventTime", None)
            package["MaxEventCPU"] = getattr(performance.cpu, "MaxEventCPU", None)

            logging.debug("Sending performance info: %s" % str(package))
            result = apmonSend(
                taskid=package["taskId"],
                jobid=package["jobId"],
                params=package,
                logr=logging,
                apmonServer=self.serverreport,
            )

            if result != 0:
                msg = "Error %i sending info for completed job %s via UDP\n" % (result, job["name"])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" % (self.destHost, self.destPort))
        apmonFree()

        return
예제 #25
0
    def handleSteps(self, job):
        """
        _handleSteps_

        Handle the post-processing step information
        """
        if job['fwjr'] == None:
            return

        steps = job['fwjr'].listSteps()
        for stepName in steps:
            step = job['fwjr'].retrieveStep(stepName)
            if not hasattr(step, 'counter'):
                continue

            counter = step.counter

            package = {}

            package.update(self.getPerformanceInformation(step))
            package.update(self.getEventInformation(stepName, job['fwjr']))

            # Input files should just be appended onto inputFiles instead of given a step #
            # per https://hypernews.cern.ch/HyperNews/CMS/get/comp-monitoring/326.html
            inputFilePackage = self.getInputFilesInformation(step)
            if inputFilePackage:
                if 'inputFiles' in package:
                    package['inputFiles'] += ';' +  inputFilePackage['inputFiles']
                else:
                    package.update(self.getInputFilesInformation(step))

            trimmedPackage = {}
            for key in package:
                if key in ['inputFiles', 'Basename', 'inputBlocks']:
                    trimmedPackage[key] = package[key]
                elif package[key] != None:
                    trimmedPackage['%d_%s' % (counter, key)] = package[key]
            package = trimmedPackage

            if not package:
                continue

            package['jobId']    = '%s_%i' % (job['name'], job['retry_count'])
            package['taskId']   = self.taskPrefix + job['workflow']
            package['%d_stepName' % counter] = stepName


            logging.debug("Sending step info: %s" % str(package))
            result = apmonSend(taskid = package['taskId'],
                               jobid = package['jobId'], params = package,
                               logr = logging, apmonServer = self.serverreport)

            if result != 0:
                msg =  "Error %i sending info for completed job %s via UDP\n" \
                        % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" \
                              % (self.destHost,
                                 self.destPort))
            apmonFree()

        return
예제 #26
0
    def handleSteps(self, job):
        """
        _handleSteps_

        Handle the post-processing step information
        """
        if job['fwjr'] == None:
            return

        steps = job['fwjr'].listSteps()
        for stepName in steps:
            step = job['fwjr'].retrieveStep(stepName)
            if not hasattr(step, 'counter'):
                continue

            counter = step.counter

            package = {}

            package.update(self.getPerformanceInformation(step))
            package.update(self.getEventInformation(stepName, job['fwjr']))

            # Input files should just be appended onto inputFiles instead of given a step #
            # per https://hypernews.cern.ch/HyperNews/CMS/get/comp-monitoring/326.html
            inputFilePackage = self.getInputFilesInformation(step)
            if inputFilePackage:
                if 'inputFiles' in package:
                    package[
                        'inputFiles'] += ';' + inputFilePackage['inputFiles']
                else:
                    package.update(self.getInputFilesInformation(step))

            trimmedPackage = {}
            for key in package:
                if key in ['inputFiles', 'Basename', 'inputBlocks']:
                    trimmedPackage[key] = package[key]
                elif package[key] != None:
                    trimmedPackage['%d_%s' % (counter, key)] = package[key]
            package = trimmedPackage

            if not package:
                continue

            package['jobId'] = '%s_%i' % (job['name'], job['retry_count'])
            package['taskId'] = self.taskPrefix + job['workflow']
            package['%d_stepName' % counter] = stepName

            logging.debug("Sending step info: %s" % str(package))
            result = apmonSend(taskid=package['taskId'],
                               jobid=package['jobId'],
                               params=package,
                               logr=logging,
                               apmonServer=self.serverreport)

            if result != 0:
                msg =  "Error %i sending info for completed job %s via UDP\n" \
                        % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" \
                              % (self.destHost,
                                 self.destPort))
            apmonFree()

        return