def generateUsageRecords(self, hostname, user_map, project_map): """ Starts the UR generation process. """ self.missing_user_mappings = {} # Creates 5 Usage Record object for count in [1,2,3,4,5]: self.state = self.state + 1 # create some data at random... job_id = str(self.state) account_name = 'default' user_name = 'default' submit_time = time.mktime(common.datetimeFromIsoStr('2012-01-01T00:00:00').timetuple()) start_time = time.mktime(common.datetimeFromIsoStr('2012-01-02T01:23:45').timetuple()) end_time = time.mktime(common.datetimeFromIsoStr('2012-01-02T02:34:56').timetuple()) # clean data and create various composite entries from the work load trace fqdn_job_id = hostname + ':' + job_id if self.idtimestamp: record_id_timestamp = re.sub("[-:TZ]","",usagerecord.epoch2isoTime(start_time)) # remove characters record_id = fqdn_job_id + ':' + record_id_timestamp else: record_id = fqdn_job_id if not user_name in user_map.getMapping(): self.missing_user_mappings[user_name] = True vo_info = [] if account_name is not None: mapped_project = project_map.get(account_name) if mapped_project is not None: voi = usagerecord.VOInformation() voi.type = 'lrmsurgen-projectmap' voi.name = mapped_project vo_info = [voi] ## fill in usage record fields ur = usagerecord.UsageRecord() ur.record_id = record_id ur.local_job_id = job_id ur.global_job_id = fqdn_job_id ur.local_user_id = user_name ur.global_user_name = user_map.get(user_name) ur.machine_name = hostname ur.queue = 'default' ur.processors = 1 ur.node_count = 1 ur.host = hostname ur.submit_time = usagerecord.epoch2isoTime(submit_time) ur.start_time = usagerecord.epoch2isoTime(start_time) ur.end_time = usagerecord.epoch2isoTime(end_time) ur.cpu_duration = 90 ur.wall_duration = 100 ur.project_name = account_name ur.vo_info += vo_info common.writeUr(ur,self.cfg)
def createUsageRecord(self, log_entry, hostname, user_map, vo_map, maui_server_host): """ Creates a Usage Record object given a Maui log entry. """ # extract data from the workload trace (log_entry) job_id = log_entry[0] user_name = log_entry[3] req_class = log_entry[7] submit_time = int(log_entry[8]) start_time = int(log_entry[10]) end_time = int(log_entry[11]) alo_tasks = int(log_entry[21]) account_name = log_entry[25] utilized_cpu = float(log_entry[29]) core_count = int(log_entry[31])*alo_tasks hosts = log_entry[37].split(':') # clean data and create various composite entries from the work load trace if job_id.isdigit() and maui_server_host is not None: job_identifier = job_id + '.' + maui_server_host else: job_identifier = job_id fqdn_job_id = hostname + ':' + job_identifier if not user_name in user_map: self.missing_user_mappings[user_name] = True queue = req_class.replace('[','').replace(']','') if ':' in queue: queue = queue.split(':')[0] if account_name == '[NONE]': account_name = None mapped_vo = None if account_name is not None: mapped_vo = vo_map.get(account_name) if mapped_vo is None: mapped_vo = vo_map.get(user_name) vo_info = [] if mapped_vo is not None: voi = usagerecord.VOInformation(name=mapped_vo, type_='bart-vomap') vo_info = [voi] wall_time = end_time - start_time # okay, this is somewhat ridiculous and complicated: # When compiled on linux, maui will think that it will only get cputime reading # from the master node. To compensate for this it multiples the utilized cpu field # with the number of tasks. However on most newer torque installations the correct # cpu utilization is reported. When combined this creates abnormally high cpu time # values for parallel jobs. The following heuristic tries to compensate for this, # by checking if the cpu time is higher than wall_time * cpus (which it never should # be), and then correct the number. However this will not work for jobs with very # low efficiancy if utilized_cpu > wall_time * alo_tasks: utilized_cpu /= alo_tasks ## fill in usage record fields ur = usagerecord.UsageRecord() ur.record_id = fqdn_job_id ur.local_job_id = job_identifier ur.global_job_id = fqdn_job_id ur.local_user_id = user_name ur.global_user_name = user_map.get(user_name) ur.machine_name = hostname ur.queue = queue ur.processors = core_count ur.node_count = len(hosts) ur.host = ','.join(hosts) ur.submit_time = usagerecord.epoch2isoTime(submit_time) ur.start_time = usagerecord.epoch2isoTime(start_time) ur.end_time = usagerecord.epoch2isoTime(end_time) ur.cpu_duration = utilized_cpu ur.wall_duration = wall_time ur.project_name = account_name ur.vo_info = vo_info return ur
def createUsageRecord(self, log_entry, hostname, user_map, project_map): """ Creates a Usage Record object given a slurm log entry. """ if log_entry[1] == '' or log_entry[2] == '': return None # extract data from the workload trace (log_entry) job_id = str(log_entry[0]) user_name = getpwuid(int(log_entry[1]))[0] queue = log_entry[2] submit_time = time.mktime(common.datetimeFromIsoStr(log_entry[3]).timetuple()) start_time = time.mktime(common.datetimeFromIsoStr(log_entry[4]).timetuple()) end_time = time.mktime(common.datetimeFromIsoStr(log_entry[5]).timetuple()) account_name = log_entry[6] utilized_cpu = common.getSeconds(log_entry[8]) wall_time = common.getSeconds(log_entry[7]) core_count = log_entry[9] hosts = self.getNodes(log_entry[10]) # clean data and create various composite entries from the work load trace job_identifier = job_id fqdn_job_id = hostname + ':' + job_id if self.idtimestamp: record_id_timestamp = re.sub("[-:TZ]","",usagerecord.epoch2isoTime(start_time)) # remove characters record_id = fqdn_job_id + ':' + record_id_timestamp else: record_id = fqdn_job_id if not user_name in user_map.getMapping(): self.missing_user_mappings[user_name] = True vo_info = [] if account_name is not None: mapped_project = project_map.get(account_name) if mapped_project is not None: voi = usagerecord.VOInformation() voi.type = 'lrmsurgen-projectmap' voi.name = mapped_project vo_info = [voi] ## fill in usage record fields ur = usagerecord.UsageRecord() ur.record_id = record_id ur.local_job_id = job_identifier ur.global_job_id = fqdn_job_id ur.local_user_id = user_name ur.global_user_name = user_map.get(user_name) ur.machine_name = hostname ur.queue = queue ur.processors = core_count ur.node_count = len(hosts) ur.host = ','.join(hosts) ur.submit_time = usagerecord.epoch2isoTime(submit_time) ur.start_time = usagerecord.epoch2isoTime(start_time) ur.end_time = usagerecord.epoch2isoTime(end_time) ur.cpu_duration = utilized_cpu ur.wall_duration = wall_time ur.project_name = account_name ur.vo_info += vo_info return ur
def createUsageRecord(self, log_entry, hostname, user_map, project_map): """ Creates a Usage Record object given a slurm log entry. """ if log_entry[1] == '' or log_entry[2] == '': return None # Transforms a string 'billing=5,cpu=2,mem=24G,node=1' into a dict # { 'billing': 5, 'cpu': 2, 'mem': '24G', 'node': 1 } tres = log_entry[9] tresdict = dict( (k.strip(), v.strip()) for k, v in (item.split('=') for item in tres.split(','))) if tres else dict() # extract data from the workload trace (log_entry) job_id = str(log_entry[0]) user_name = getpwuid(int(log_entry[1]))[0] queue = log_entry[2] submit_time = time.mktime( common.datetimeFromIsoStr(log_entry[3]).timetuple()) start_time = time.mktime( common.datetimeFromIsoStr(log_entry[4]).timetuple()) end_time = time.mktime( common.datetimeFromIsoStr(log_entry[5]).timetuple()) account_name = log_entry[6] utilized_cpu = common.getSeconds(log_entry[8]) wall_time = common.getSeconds(log_entry[7]) processors = self.getProcessors(tresdict) charge = self.getCharge(tresdict, wall_time) hosts = self.getNodes(log_entry[10]) nnodes = int(log_entry[11]) # clean data and create various composite entries from the work load trace job_identifier = job_id fqdn_job_id = hostname + ':' + job_id if self.idtimestamp: record_id_timestamp = re.sub( "[-:TZ]", "", usagerecord.epoch2isoTime(start_time)) # remove characters record_id = fqdn_job_id + ':' + record_id_timestamp else: record_id = fqdn_job_id if not user_name in user_map.getMapping(): self.missing_user_mappings[user_name] = True vo_info = [] if account_name is not None: mapped_project = project_map.get(account_name) if mapped_project is not None: voi = usagerecord.VOInformation() voi.type = 'lrmsurgen-projectmap' voi.name = mapped_project vo_info = [voi] ## fill in usage record fields ur = usagerecord.UsageRecord() ur.record_id = record_id ur.local_job_id = job_identifier ur.global_job_id = fqdn_job_id ur.local_user_id = user_name ur.global_user_name = user_map.get(user_name) ur.machine_name = hostname ur.queue = queue ur.processors = processors ur.node_count = nnodes ur.host = ','.join(hosts) ur.submit_time = usagerecord.epoch2isoTime(submit_time) ur.start_time = usagerecord.epoch2isoTime(start_time) ur.end_time = usagerecord.epoch2isoTime(end_time) ur.cpu_duration = utilized_cpu ur.wall_duration = wall_time ur.project_name = account_name ur.vo_info += vo_info # Optional field: if charge is not None: ur.charge = charge return ur
def createUsageRecord(self, log_entry, hostname, user_map, vo_map): """ Creates a Usage Record object given a Torque log entry. """ # extract data from the workload trace (log_entry) job_id = log_entry['jobid'] user_name = log_entry['user'] queue = log_entry['queue'] account = log_entry.get('account') submit_time = int(log_entry['ctime']) start_time = int(log_entry['start']) end_time = int(log_entry['end']) utilized_cpu = self.getSeconds(log_entry['resources_used.cput']) wall_time = self.getSeconds(log_entry['resources_used.walltime']) hosts = list(set([hc.split('/')[0] for hc in log_entry['exec_host'].split('+')])) # initial value node_count = len(hosts) if log_entry.has_key('Resource_List.ncpus'): core_count = int(log_entry['Resource_List.ncpus']) elif log_entry.has_key('Resource_List.nodes'): core_count = self.getCoreCount(log_entry['Resource_List.nodes']) # mppwidth is used on e.g. Cray machines instead of ncpus / nodes elif log_entry.has_key('Resource_List.mppwidth') or log_entry.has_key('Resource_List.size'): if log_entry.has_key('Resource_List.mppwidth'): core_count = int(log_entry['Resource_List.mppwidth']) # older versions on e.g. Cray machines use "size" as keyword for mppwidth or core_count elif log_entry.has_key('Resource_List.size'): core_count = int(log_entry['Resource_List.size']) # get node count, mppnodect exist only in newer versions if log_entry.has_key('Resource_List.mppnodect'): node_count = int(log_entry['Resource_List.mppnodect']) else: logging.warning('Missing mppnodect for entry: %s (will guess from "core count"/mppnppn)' % job_id) try: node_count = core_count / int(log_entry['Resource_List.mppnppn']) except: logging.warning('Unable to calculate node count for entry: %s (will guess from host list)' % job_id) # keep the default of len(hosts) given above else: logging.warning('Missing processor count for entry: %s (will guess from host list)' % job_id) # assume the number of exec hosts is the core count (possibly not right) core_count = len(hosts) # clean data and create various composite entries from the work load trace if job_id.isdigit() and hostname is not None: job_identifier = job_id + '.' + hostname else: job_identifier = job_id fqdn_job_id = hostname + ':' + job_identifier if not user_name in user_map.getMapping(): self.missing_user_mappings[user_name] = True vo_info = [] if account: mapped_vo = vo_map.get(account) else: mapped_vo = vo_map.get(user_name) if mapped_vo is not None: voi = usagerecord.VOInformation(name=mapped_vo, type_='bart-vomap') vo_info.append(voi) ## fill in usage record fields ur = usagerecord.UsageRecord() ur.record_id = fqdn_job_id ur.local_job_id = job_identifier ur.global_job_id = fqdn_job_id ur.local_user_id = user_name ur.global_user_name = user_map.get(user_name) ur.machine_name = hostname ur.queue = queue ur.project_name = account ur.processors = core_count ur.node_count = node_count ur.host = ','.join(hosts) ur.submit_time = usagerecord.epoch2isoTime(submit_time) ur.start_time = usagerecord.epoch2isoTime(start_time) ur.end_time = usagerecord.epoch2isoTime(end_time) ur.cpu_duration = utilized_cpu ur.wall_duration = wall_time ur.vo_info += vo_info ur.exit_code = log_entry['Exit_status'] return ur
def createUsageRecord(self, log_entry, hostname, user_map, project_map): """ Creates a Usage Record object given a slurm log entry. """ if log_entry[1] == '' or log_entry[2] == '': return None # extract data from the workload trace (log_entry) job_id = str(log_entry[0]) user_name = getpwuid(int(log_entry[1]))[0] queue = log_entry[2] submit_time = time.mktime( common.datetimeFromIsoStr(log_entry[3]).timetuple()) start_time = time.mktime( common.datetimeFromIsoStr(log_entry[4]).timetuple()) end_time = time.mktime( common.datetimeFromIsoStr(log_entry[5]).timetuple()) account_name = log_entry[6] utilized_cpu = common.getSeconds(log_entry[8]) wall_time = common.getSeconds(log_entry[7]) core_count = self.extractBillingUnit(log_entry[9]) hosts = self.getNodes(log_entry[10]) # clean data and create various composite entries from the work load trace job_identifier = job_id fqdn_job_id = hostname + ':' + job_id if self.idtimestamp: record_id_timestamp = re.sub( "[-:TZ]", "", usagerecord.epoch2isoTime(start_time)) # remove characters record_id = fqdn_job_id + ':' + record_id_timestamp else: record_id = fqdn_job_id if not user_name in user_map.getMapping(): self.missing_user_mappings[user_name] = True vo_info = [] if account_name is not None: mapped_project = project_map.get(account_name) if mapped_project is not None: voi = usagerecord.VOInformation() voi.type = 'lrmsurgen-projectmap' voi.name = mapped_project vo_info = [voi] ## fill in usage record fields ur = usagerecord.UsageRecord() ur.record_id = record_id ur.local_job_id = job_identifier ur.global_job_id = fqdn_job_id ur.local_user_id = user_name ur.global_user_name = user_map.get(user_name) ur.machine_name = hostname ur.queue = queue ur.processors = core_count ur.node_count = len(hosts) ur.host = ','.join(hosts) ur.submit_time = usagerecord.epoch2isoTime(submit_time) ur.start_time = usagerecord.epoch2isoTime(start_time) ur.end_time = usagerecord.epoch2isoTime(end_time) ur.cpu_duration = utilized_cpu ur.wall_duration = wall_time ur.project_name = account_name ur.vo_info += vo_info return ur
def generateUsageRecords(self, hostname, user_map, project_map): """ Starts the UR generation process. """ self.missing_user_mappings = {} # Creates 5 Usage Record object for count in [1, 2, 3, 4, 5]: self.state = self.state + 1 # create some data at random... job_id = str(self.state) account_name = 'default' user_name = 'default' submit_time = time.mktime( common.datetimeFromIsoStr('2012-01-01T00:00:00').timetuple()) start_time = time.mktime( common.datetimeFromIsoStr('2012-01-02T01:23:45').timetuple()) end_time = time.mktime( common.datetimeFromIsoStr('2012-01-02T02:34:56').timetuple()) # clean data and create various composite entries from the work load trace fqdn_job_id = hostname + ':' + job_id if self.idtimestamp: record_id_timestamp = re.sub( "[-:TZ]", "", usagerecord.epoch2isoTime(start_time)) # remove characters record_id = fqdn_job_id + ':' + record_id_timestamp else: record_id = fqdn_job_id if not user_name in user_map.getMapping(): self.missing_user_mappings[user_name] = True vo_info = [] if account_name is not None: mapped_project = project_map.get(account_name) if mapped_project is not None: voi = usagerecord.VOInformation() voi.type = 'lrmsurgen-projectmap' voi.name = mapped_project vo_info = [voi] ## fill in usage record fields ur = usagerecord.UsageRecord() ur.record_id = record_id ur.local_job_id = job_id ur.global_job_id = fqdn_job_id ur.local_user_id = user_name ur.global_user_name = user_map.get(user_name) ur.machine_name = hostname ur.queue = 'default' ur.processors = 1 ur.node_count = 1 ur.host = hostname ur.submit_time = usagerecord.epoch2isoTime(submit_time) ur.start_time = usagerecord.epoch2isoTime(start_time) ur.end_time = usagerecord.epoch2isoTime(end_time) ur.cpu_duration = 90 ur.wall_duration = 100 ur.project_name = account_name ur.vo_info += vo_info common.writeUr(ur, self.cfg)