def deactivate_profile(self, job=None): self._check_pmi() if job is None: job = pbs.event().job if _running_excl(job): pbs.logjobmsg(job.id, "PMI: reset current_eoe") for h in _get_vnode_names(job): try: pbs.event().vnode_list[h].current_eoe = None except: pass return self.__pmi._deactivate_profile(job)
def _activate_profile(self, profile_name, job): pbs.logmsg(pbs.LOG_DEBUG, "Cray: %s activate '%s'" % (job.id, str(profile_name))) nids, cnt = nidlist(job) if cnt == 0: pbs.logjobmsg(job.id, "Cray: no compute nodes for power setting") return False energy = job_energy(job, nids, cnt) if energy is not None: f = open(energy_file(job), "w") f.write(str(energy)) f.close() # If this is the only job, set nodes to capped power. if _running_excl(job): cmd = "set_power_cap --nids " + nids doit = False pcap = job.Resource_List['pcap_node'] if pcap is not None: pbs.logjobmsg(job.id, "Cray: pcap node %d" % pcap) cmd += " --node " + str(pcap) doit = True pcap = job.Resource_List['pcap_accelerator'] if pcap is not None: pbs.logjobmsg(job.id, "Cray: pcap accel %d" % pcap) cmd += " --accel " + str(pcap) doit = True if doit: launch(job.id, cmd) else: pbs.logjobmsg(job.id, "Cray: no power cap to set") return True
def _deactivate_profile(self, job): pbs.logmsg(pbs.LOG_DEBUG, "Cray: deactivate %s" % job.id) nids, cnt = nidlist(job) if cnt == 0: pbs.logjobmsg(job.id, "Cray: no compute nodes for power setting") return False # remove initial energy file try: os.unlink(energy_file(job)) except Exception: pass # If this is the only job, undo any power cap we set. if _running_excl(job): cmd = "set_power_cap --nids " + nids doit = False pcap = job.Resource_List['pcap_node'] if pcap is not None: pbs.logjobmsg(job.id, "Cray: remove pcap node %d" % pcap) cmd += " --node 0" doit = True pcap = job.Resource_List['pcap_accelerator'] if pcap is not None: pbs.logjobmsg(job.id, "Cray: remove pcap accel %d" % pcap) cmd += " --accel 0" doit = True if doit: try: launch(job.id, cmd) except Exception: pass else: pbs.logjobmsg(job.id, "Cray: no power cap to remove") # Get final energy value from RUR data name = rur_file(job) try: rurfp = open(name, "r") except Exception: pbs.logjobmsg(job.id, "Cray: no RUR data") return False sbuf = os.fstat(rurfp.fileno()) if (sbuf.st_uid != 0) or (sbuf.st_mode & stat.S_IWOTH): pbs.logjobmsg(job.id, "Cray: RUR file permission: %s" % name) rurfp.close() os.unlink(name) return False pbs.logjobmsg(job.id, "Cray: reading RUR file: %s" % name) energy = 0 seen = False # track if energy plugin is seen for line in rurfp: plugin, _, rest = line.partition(" : ") if plugin != "energy": # check that the plugin is energy continue apid, _, metstr = rest.partition(" : ") seen = True try: # parse the metric list metlist = eval(metstr, {}) metrics = dict(metlist[i:i + 2] for i in range(0, len(metlist), 2)) joules = metrics["energy_used"] energy += joules pbs.logjobmsg( job.id, 'Cray:RUR: {"apid":%s,"apid_energy":%dJ,"job_energy":%dJ}' % (apid, joules, energy)) except Exception as e: pbs.logjobmsg(job.id, "Cray:RUR: energy_used not found: %s" % str(e)) rurfp.close() os.unlink(name) if not seen: pbs.logjobmsg(job.id, "Cray:RUR: no energy plugin") return False old_energy = job.resources_used["energy"] new_energy = float(energy) / 3600000.0 if old_energy is None: pbs.logjobmsg(job.id, "Cray:RUR: energy %fkWh" % new_energy) job.resources_used["energy"] = new_energy elif new_energy > old_energy: pbs.logjobmsg( job.id, "Cray:RUR: energy %fkWh replaces periodic energy %fkWh" % (new_energy, old_energy)) job.resources_used["energy"] = new_energy else: pbs.logjobmsg( job.id, "Cray:RUR: energy %fkWh last periodic usage %fkWh" % (new_energy, old_energy)) return True
def _deactivate_profile(self, job): pbs.logmsg(pbs.LOG_DEBUG, "Cray: deactivate %s" % job.id) nids, cnt = nidlist(job) if cnt == 0: pbs.logjobmsg(job.id, "Cray: no compute nodes for power setting") return False # remove initial energy file try: os.unlink(energy_file(job)) except Exception: pass # If this is the only job, undo any power cap we set. if _running_excl(job): cmd = "set_power_cap --nids " + nids doit = False pcap = job.Resource_List['pcap_node'] if pcap is not None: pbs.logjobmsg(job.id, "Cray: remove pcap node %d" % pcap) cmd += " --node 0" doit = True pcap = job.Resource_List['pcap_accelerator'] if pcap is not None: pbs.logjobmsg(job.id, "Cray: remove pcap accel %d" % pcap) cmd += " --accel 0" doit = True if doit: try: launch(job.id, cmd) except Exception: pass else: pbs.logjobmsg(job.id, "Cray: no power cap to remove") # Get final energy value from RUR data name = rur_file(job) try: rurfp = open(name, "r") except Exception: pbs.logjobmsg(job.id, "Cray: no RUR data") return False sbuf = os.fstat(rurfp.fileno()) if (sbuf.st_uid != 0) or (sbuf.st_mode & stat.S_IWOTH): pbs.logjobmsg(job.id, "Cray: RUR file permission: %s" % name) rurfp.close() os.unlink(name) return False pbs.logjobmsg(job.id, "Cray: reading RUR file: %s" % name) energy = 0 seen = False # track if energy plugin is seen for line in rurfp: plugin, _, rest = line.partition(" : ") if plugin != "energy": # check that the plugin is energy continue apid, _, metstr = rest.partition(" : ") seen = True try: # parse the metric list metlist = eval(metstr, {}) metrics = dict(metlist[i:i + 2] for i in range(0, len(metlist), 2)) joules = metrics["energy_used"] energy += joules pbs.logjobmsg(job.id, 'Cray:RUR: {"apid":%s,"apid_energy":%dJ,"job_energy":%dJ}' % (apid, joules, energy)) except Exception, e: pbs.logjobmsg(job.id, "Cray:RUR: energy_used not found: %s" % str(e))