Example #1
0
    def generateUsageRecords(self, hostname, user_map, project_map):
        """
        Starts the UR generation process.
        """
        self.missing_user_mappings = {}
        
        # Creates 5 Usage Record object
        for count in [1,2,3,4,5]:
            self.state = self.state + 1
            
            # create some data at random...
            job_id       = str(self.state)      
            account_name = 'default'
            user_name    = 'default'
            submit_time  = time.mktime(common.datetimeFromIsoStr('2012-01-01T00:00:00').timetuple())
            start_time   = time.mktime(common.datetimeFromIsoStr('2012-01-02T01:23:45').timetuple())
            end_time     = time.mktime(common.datetimeFromIsoStr('2012-01-02T02:34:56').timetuple())

            # clean data and create various composite entries from the work load trace
            fqdn_job_id = hostname + ':' + job_id
            if self.idtimestamp:
                record_id_timestamp = re.sub("[-:TZ]","",usagerecord.epoch2isoTime(start_time)) # remove characters
                record_id = fqdn_job_id + ':' + record_id_timestamp
            else:
                record_id = fqdn_job_id

            if not user_name in user_map.getMapping():
                self.missing_user_mappings[user_name] = True

            vo_info = []
            if account_name is not None:
                mapped_project = project_map.get(account_name)
                if mapped_project is not None:
                    voi = usagerecord.VOInformation()
                    voi.type = 'lrmsurgen-projectmap'
                    voi.name = mapped_project
                    vo_info = [voi]

            ## fill in usage record fields
            ur = usagerecord.UsageRecord()
            ur.record_id        = record_id
            ur.local_job_id     = job_id
            ur.global_job_id    = fqdn_job_id
            ur.local_user_id    = user_name
            ur.global_user_name = user_map.get(user_name)
            ur.machine_name     = hostname
            ur.queue            = 'default'
            ur.processors       = 1
            ur.node_count       = 1
            ur.host             = hostname
            ur.submit_time      = usagerecord.epoch2isoTime(submit_time)
            ur.start_time       = usagerecord.epoch2isoTime(start_time)
            ur.end_time         = usagerecord.epoch2isoTime(end_time)
            ur.cpu_duration     = 90
            ur.wall_duration    = 100
            ur.project_name     = account_name
            ur.vo_info         += vo_info

            common.writeUr(ur,self.cfg)
Example #2
0
 def createUsageRecord(self, log_entry, hostname, user_map, vo_map, maui_server_host):
     """
     Creates a Usage Record object given a Maui log entry.
     """
 
     # extract data from the workload trace (log_entry)
 
     job_id       = log_entry[0]
     user_name    = log_entry[3]
     req_class    = log_entry[7]
     submit_time  = int(log_entry[8])
     start_time   = int(log_entry[10])
     end_time     = int(log_entry[11])
     alo_tasks    = int(log_entry[21])
     account_name = log_entry[25]
     utilized_cpu = float(log_entry[29])
     core_count   = int(log_entry[31])*alo_tasks
     hosts        = log_entry[37].split(':')
 
     # clean data and create various composite entries from the work load trace
 
     if job_id.isdigit() and maui_server_host is not None:
         job_identifier = job_id + '.' + maui_server_host
     else:
         job_identifier = job_id
     fqdn_job_id = hostname + ':' + job_identifier
 
     if not user_name in user_map:
         self.missing_user_mappings[user_name] = True
 
     queue = req_class.replace('[','').replace(']','')
     if ':' in queue:
         queue = queue.split(':')[0]
 
     if account_name == '[NONE]':
         account_name = None
 
     mapped_vo = None
     if account_name is not None:
         mapped_vo = vo_map.get(account_name)
     if mapped_vo is None:
         mapped_vo = vo_map.get(user_name)
 
     vo_info = []
     if mapped_vo is not None:
         voi = usagerecord.VOInformation(name=mapped_vo, type_='bart-vomap')
         vo_info = [voi]
 
     wall_time = end_time - start_time
 
     # okay, this is somewhat ridiculous and complicated:
     # When compiled on linux, maui will think that it will only get cputime reading
     # from the master node. To compensate for this it multiples the utilized cpu field
     # with the number of tasks. However on most newer torque installations the correct
     # cpu utilization is reported. When combined this creates abnormally high cpu time
     # values for parallel jobs. The following heuristic tries to compensate for this,
     # by checking if the cpu time is higher than wall_time * cpus (which it never should
     # be), and then correct the number. However this will not work for jobs with very
     # low efficiancy
 
     if utilized_cpu > wall_time * alo_tasks:
         utilized_cpu /= alo_tasks
 
     ## fill in usage record fields
 
     ur = usagerecord.UsageRecord()
 
     ur.record_id = fqdn_job_id
 
     ur.local_job_id = job_identifier
     ur.global_job_id = fqdn_job_id
 
     ur.local_user_id = user_name
     ur.global_user_name = user_map.get(user_name)
 
     ur.machine_name = hostname
     ur.queue = queue
 
     ur.processors = core_count
     ur.node_count = len(hosts)
     ur.host = ','.join(hosts)
 
     ur.submit_time = usagerecord.epoch2isoTime(submit_time)
     ur.start_time  = usagerecord.epoch2isoTime(start_time)
     ur.end_time    = usagerecord.epoch2isoTime(end_time)
 
     ur.cpu_duration = utilized_cpu
     ur.wall_duration = wall_time
 
     ur.project_name = account_name
     ur.vo_info = vo_info
 
     return ur
Example #3
0
File: slurm.py Project: paran1/bart
    def createUsageRecord(self, log_entry, hostname, user_map, project_map):
        """
        Creates a Usage Record object given a slurm log entry.
        """
        
        if log_entry[1] == '' or log_entry[2] == '':
            return None

        # extract data from the workload trace (log_entry)
        job_id       = str(log_entry[0])
        user_name    = getpwuid(int(log_entry[1]))[0]
        queue        = log_entry[2]
        submit_time  = time.mktime(common.datetimeFromIsoStr(log_entry[3]).timetuple())
        start_time   = time.mktime(common.datetimeFromIsoStr(log_entry[4]).timetuple())
        end_time     = time.mktime(common.datetimeFromIsoStr(log_entry[5]).timetuple())
        account_name = log_entry[6]
        utilized_cpu = common.getSeconds(log_entry[8])
        wall_time    = common.getSeconds(log_entry[7])
        core_count   = log_entry[9]
        hosts        = self.getNodes(log_entry[10])

        # clean data and create various composite entries from the work load trace
        job_identifier = job_id
        fqdn_job_id = hostname + ':' + job_id
        if self.idtimestamp:
            record_id_timestamp = re.sub("[-:TZ]","",usagerecord.epoch2isoTime(start_time)) # remove characters
            record_id = fqdn_job_id + ':' + record_id_timestamp
        else:
            record_id = fqdn_job_id

        if not user_name in user_map.getMapping():
            self.missing_user_mappings[user_name] = True

        vo_info = []
        if account_name is not None:
            mapped_project = project_map.get(account_name)
            if mapped_project is not None:
                voi = usagerecord.VOInformation()
                voi.type = 'lrmsurgen-projectmap'
                voi.name = mapped_project
                vo_info = [voi]

        ## fill in usage record fields
        ur = usagerecord.UsageRecord()
        ur.record_id        = record_id
        ur.local_job_id     = job_identifier
        ur.global_job_id    = fqdn_job_id
        ur.local_user_id    = user_name
        ur.global_user_name = user_map.get(user_name)
        ur.machine_name     = hostname
        ur.queue            = queue
        ur.processors       = core_count
        ur.node_count       = len(hosts)
        ur.host             = ','.join(hosts)
        ur.submit_time      = usagerecord.epoch2isoTime(submit_time)
        ur.start_time       = usagerecord.epoch2isoTime(start_time)
        ur.end_time         = usagerecord.epoch2isoTime(end_time)
        ur.cpu_duration     = utilized_cpu
        ur.wall_duration    = wall_time
        ur.project_name     = account_name
        ur.vo_info         += vo_info

        return ur
Example #4
0
    def createUsageRecord(self, log_entry, hostname, user_map, project_map):
        """
        Creates a Usage Record object given a slurm log entry.
        """

        if log_entry[1] == '' or log_entry[2] == '':
            return None

        # Transforms a string 'billing=5,cpu=2,mem=24G,node=1' into a dict
        # { 'billing': 5, 'cpu': 2, 'mem': '24G', 'node': 1 }
        tres = log_entry[9]
        tresdict = dict(
            (k.strip(), v.strip())
            for k, v in (item.split('=')
                         for item in tres.split(','))) if tres else dict()

        # extract data from the workload trace (log_entry)
        job_id = str(log_entry[0])
        user_name = getpwuid(int(log_entry[1]))[0]
        queue = log_entry[2]
        submit_time = time.mktime(
            common.datetimeFromIsoStr(log_entry[3]).timetuple())
        start_time = time.mktime(
            common.datetimeFromIsoStr(log_entry[4]).timetuple())
        end_time = time.mktime(
            common.datetimeFromIsoStr(log_entry[5]).timetuple())
        account_name = log_entry[6]
        utilized_cpu = common.getSeconds(log_entry[8])
        wall_time = common.getSeconds(log_entry[7])
        processors = self.getProcessors(tresdict)
        charge = self.getCharge(tresdict, wall_time)
        hosts = self.getNodes(log_entry[10])
        nnodes = int(log_entry[11])

        # clean data and create various composite entries from the work load trace
        job_identifier = job_id
        fqdn_job_id = hostname + ':' + job_id
        if self.idtimestamp:
            record_id_timestamp = re.sub(
                "[-:TZ]", "",
                usagerecord.epoch2isoTime(start_time))  # remove characters
            record_id = fqdn_job_id + ':' + record_id_timestamp
        else:
            record_id = fqdn_job_id

        if not user_name in user_map.getMapping():
            self.missing_user_mappings[user_name] = True

        vo_info = []
        if account_name is not None:
            mapped_project = project_map.get(account_name)
            if mapped_project is not None:
                voi = usagerecord.VOInformation()
                voi.type = 'lrmsurgen-projectmap'
                voi.name = mapped_project
                vo_info = [voi]

        ## fill in usage record fields
        ur = usagerecord.UsageRecord()
        ur.record_id = record_id
        ur.local_job_id = job_identifier
        ur.global_job_id = fqdn_job_id
        ur.local_user_id = user_name
        ur.global_user_name = user_map.get(user_name)
        ur.machine_name = hostname
        ur.queue = queue
        ur.processors = processors
        ur.node_count = nnodes
        ur.host = ','.join(hosts)
        ur.submit_time = usagerecord.epoch2isoTime(submit_time)
        ur.start_time = usagerecord.epoch2isoTime(start_time)
        ur.end_time = usagerecord.epoch2isoTime(end_time)
        ur.cpu_duration = utilized_cpu
        ur.wall_duration = wall_time
        ur.project_name = account_name
        ur.vo_info += vo_info

        # Optional field:
        if charge is not None:
            ur.charge = charge

        return ur
Example #5
0
    def createUsageRecord(self, log_entry, hostname, user_map, vo_map):
        """
        Creates a Usage Record object given a Torque log entry.
        """

        # extract data from the workload trace (log_entry)
        job_id       = log_entry['jobid']
        user_name    = log_entry['user']
        queue        = log_entry['queue']
        account      = log_entry.get('account')
        submit_time  = int(log_entry['ctime'])
        start_time   = int(log_entry['start'])
        end_time     = int(log_entry['end'])
        utilized_cpu = self.getSeconds(log_entry['resources_used.cput'])
        wall_time    = self.getSeconds(log_entry['resources_used.walltime'])

        hosts = list(set([hc.split('/')[0] for hc in log_entry['exec_host'].split('+')]))

        # initial value
        node_count = len(hosts)

        if log_entry.has_key('Resource_List.ncpus'):
            core_count = int(log_entry['Resource_List.ncpus'])
        elif log_entry.has_key('Resource_List.nodes'):
            core_count = self.getCoreCount(log_entry['Resource_List.nodes'])
        # mppwidth is used on e.g. Cray machines instead of ncpus / nodes
        elif log_entry.has_key('Resource_List.mppwidth') or log_entry.has_key('Resource_List.size'):
            if log_entry.has_key('Resource_List.mppwidth'):
                core_count = int(log_entry['Resource_List.mppwidth'])
            # older versions on e.g. Cray machines use "size" as keyword for mppwidth or core_count
            elif log_entry.has_key('Resource_List.size'):
                core_count = int(log_entry['Resource_List.size'])
            # get node count, mppnodect exist only in newer versions
            if log_entry.has_key('Resource_List.mppnodect'):
                node_count = int(log_entry['Resource_List.mppnodect'])
            else:
                logging.warning('Missing mppnodect for entry: %s (will guess from "core count"/mppnppn)' % job_id)
                try:
                    node_count = core_count / int(log_entry['Resource_List.mppnppn'])
                except:
                    logging.warning('Unable to calculate node count for entry: %s (will guess from host list)' % job_id)
                    # keep the default of len(hosts) given above
        else:
            logging.warning('Missing processor count for entry: %s (will guess from host list)' % job_id)
            # assume the number of exec hosts is the core count (possibly not right)
            core_count = len(hosts)

        # clean data and create various composite entries from the work load trace
        if job_id.isdigit() and hostname is not None:
            job_identifier = job_id + '.' + hostname
        else:
            job_identifier = job_id
        fqdn_job_id = hostname + ':' + job_identifier

        if not user_name in user_map.getMapping():
            self.missing_user_mappings[user_name] = True

        vo_info = []
        if account:
            mapped_vo = vo_map.get(account)
        else:
            mapped_vo = vo_map.get(user_name)
        if mapped_vo is not None:
            voi = usagerecord.VOInformation(name=mapped_vo, type_='bart-vomap')
            vo_info.append(voi)

        ## fill in usage record fields
        ur = usagerecord.UsageRecord()
        ur.record_id        = fqdn_job_id
        ur.local_job_id     = job_identifier
        ur.global_job_id    = fqdn_job_id
        ur.local_user_id    = user_name
        ur.global_user_name = user_map.get(user_name)
        ur.machine_name     = hostname
        ur.queue            = queue
        ur.project_name     = account
        ur.processors       = core_count
        ur.node_count       = node_count
        ur.host             = ','.join(hosts)
        ur.submit_time      = usagerecord.epoch2isoTime(submit_time)
        ur.start_time       = usagerecord.epoch2isoTime(start_time)
        ur.end_time         = usagerecord.epoch2isoTime(end_time)
        ur.cpu_duration     = utilized_cpu
        ur.wall_duration    = wall_time
        ur.vo_info         += vo_info
        ur.exit_code        = log_entry['Exit_status']

        return ur
Example #6
0
    def createUsageRecord(self, log_entry, hostname, user_map, project_map):
        """
        Creates a Usage Record object given a slurm log entry.
        """

        if log_entry[1] == '' or log_entry[2] == '':
            return None

        # extract data from the workload trace (log_entry)
        job_id = str(log_entry[0])
        user_name = getpwuid(int(log_entry[1]))[0]
        queue = log_entry[2]
        submit_time = time.mktime(
            common.datetimeFromIsoStr(log_entry[3]).timetuple())
        start_time = time.mktime(
            common.datetimeFromIsoStr(log_entry[4]).timetuple())
        end_time = time.mktime(
            common.datetimeFromIsoStr(log_entry[5]).timetuple())
        account_name = log_entry[6]
        utilized_cpu = common.getSeconds(log_entry[8])
        wall_time = common.getSeconds(log_entry[7])
        core_count = self.extractBillingUnit(log_entry[9])
        hosts = self.getNodes(log_entry[10])

        # clean data and create various composite entries from the work load trace
        job_identifier = job_id
        fqdn_job_id = hostname + ':' + job_id
        if self.idtimestamp:
            record_id_timestamp = re.sub(
                "[-:TZ]", "",
                usagerecord.epoch2isoTime(start_time))  # remove characters
            record_id = fqdn_job_id + ':' + record_id_timestamp
        else:
            record_id = fqdn_job_id

        if not user_name in user_map.getMapping():
            self.missing_user_mappings[user_name] = True

        vo_info = []
        if account_name is not None:
            mapped_project = project_map.get(account_name)
            if mapped_project is not None:
                voi = usagerecord.VOInformation()
                voi.type = 'lrmsurgen-projectmap'
                voi.name = mapped_project
                vo_info = [voi]

        ## fill in usage record fields
        ur = usagerecord.UsageRecord()
        ur.record_id = record_id
        ur.local_job_id = job_identifier
        ur.global_job_id = fqdn_job_id
        ur.local_user_id = user_name
        ur.global_user_name = user_map.get(user_name)
        ur.machine_name = hostname
        ur.queue = queue
        ur.processors = core_count
        ur.node_count = len(hosts)
        ur.host = ','.join(hosts)
        ur.submit_time = usagerecord.epoch2isoTime(submit_time)
        ur.start_time = usagerecord.epoch2isoTime(start_time)
        ur.end_time = usagerecord.epoch2isoTime(end_time)
        ur.cpu_duration = utilized_cpu
        ur.wall_duration = wall_time
        ur.project_name = account_name
        ur.vo_info += vo_info

        return ur
Example #7
0
    def generateUsageRecords(self, hostname, user_map, project_map):
        """
        Starts the UR generation process.
        """
        self.missing_user_mappings = {}

        # Creates 5 Usage Record object
        for count in [1, 2, 3, 4, 5]:
            self.state = self.state + 1

            # create some data at random...
            job_id = str(self.state)
            account_name = 'default'
            user_name = 'default'
            submit_time = time.mktime(
                common.datetimeFromIsoStr('2012-01-01T00:00:00').timetuple())
            start_time = time.mktime(
                common.datetimeFromIsoStr('2012-01-02T01:23:45').timetuple())
            end_time = time.mktime(
                common.datetimeFromIsoStr('2012-01-02T02:34:56').timetuple())

            # clean data and create various composite entries from the work load trace
            fqdn_job_id = hostname + ':' + job_id
            if self.idtimestamp:
                record_id_timestamp = re.sub(
                    "[-:TZ]", "",
                    usagerecord.epoch2isoTime(start_time))  # remove characters
                record_id = fqdn_job_id + ':' + record_id_timestamp
            else:
                record_id = fqdn_job_id

            if not user_name in user_map.getMapping():
                self.missing_user_mappings[user_name] = True

            vo_info = []
            if account_name is not None:
                mapped_project = project_map.get(account_name)
                if mapped_project is not None:
                    voi = usagerecord.VOInformation()
                    voi.type = 'lrmsurgen-projectmap'
                    voi.name = mapped_project
                    vo_info = [voi]

            ## fill in usage record fields
            ur = usagerecord.UsageRecord()
            ur.record_id = record_id
            ur.local_job_id = job_id
            ur.global_job_id = fqdn_job_id
            ur.local_user_id = user_name
            ur.global_user_name = user_map.get(user_name)
            ur.machine_name = hostname
            ur.queue = 'default'
            ur.processors = 1
            ur.node_count = 1
            ur.host = hostname
            ur.submit_time = usagerecord.epoch2isoTime(submit_time)
            ur.start_time = usagerecord.epoch2isoTime(start_time)
            ur.end_time = usagerecord.epoch2isoTime(end_time)
            ur.cpu_duration = 90
            ur.wall_duration = 100
            ur.project_name = account_name
            ur.vo_info += vo_info

            common.writeUr(ur, self.cfg)