def pbs(host, kind): _pbs = pbs_mongo() # get hpc user config = cm_config() user = config.get("cloudmesh.hpc.username") _pbs.activate(host, user) print "ACTIVE PBS HOSTS", _pbs.hosts if host is None: hosts = _pbs.hosts else: hosts = [host] if kind is None: types = ['qstat', 'nodes'] else: types = [kind] d = [] for host in hosts: for kind in types: if kind in ["qstat", "queue", "q"]: d = _pbs.get_pbsnodes(host) elif kind in ["nodes"]: d = _pbs.refresh_pbsnodes(host) for e in d: print "PBS -->" pprint(e)
def refresh_qstat(hosts): ''' Launches the recipies on the server as per the task_dict. The task dict should the following properties name: name of the server recipies: a list of tuples where the first element of the tuple would be name of recipie and second would be the type host: information about the host ''' max_time_diff_allowed = 30 # indicates 30 seconds of time difference allowed between old and new values config = cm_config() user = config["cloudmesh"]["hpc"]["username"] pbs = pbs_mongo() error = "" print("task recieved") for host in hosts: time = datetime.datetime.now() datetime.datetime.now() data = pbs.get_qstat(host) perform_refresh = False jobcount = data.count() if jobcount > 0: last_refresh = data[0]["cm_refresh"] time_diff = time - last_refresh if time_diff.seconds > max_time_diff_allowed: perform_refresh = True else: perform_refresh = True if perform_refresh: print("Beginning refresh for {0}".format(host)) pbs.activate(host, user) try: d = pbs.refresh_qstat(host) except Exception, e: error += "error {0} {1}".format(str(host), str(e)) else: print("No refresh needed for {0}".format(host))
def display_mongo_qinfo_refresh(host=None): log.info("qinfo refresh request {0}".format(host)) timeout = 15 config = cm_config() user = config["cloudmesh"]["hpc"]["username"] if host is None: hosts = ["india.futuregrid.org"] elif host in ["bravo.futuregrid.org", "echo.futuregrid.org", "delta.futuregrid.org"]: hosts = ["india.futuregrid.org"] else: hosts = [host] error = "" try: pbs = pbs_mongo() for host in hosts: pbs.activate(host, user) res = pbs.refresh_qinfo(host) except Exception, e: print traceback.format_exc() error = "{0}".format(e) log.error(error) category = "qinfo-{0}".format(host) flash(error, category=str(category))
def display_pbs_action(action, host): error = "" config = cm_config() user = config.get("cloudmesh.hpc.username") pbs = pbs_mongo() pbs.activate(host, user) time_now = datetime.now() if action == "nodes": data = pbs.get(host, "nodes") # data = pbs.pbsnodes() page = 'mesh/cloud/mesh_pbsnodes.html', elif action == "queue": # data = pbs.refresh_pbsnodes(host) data = pbs.get(host, "qstat") # data = pbs.qstat() page = 'mesh/hpc/mesh_qstat.html' else: return render_template('error.html', updated=time_now, error=error, type="Page not found", msg="action {0} does not exist".format(action)) return render_template(page, updated=time_now, host=host, table_data=data)
def display_mongo_qinfo_refresh(host=None): log.info("qinfo refresh request {0}".format(host)) timeout = 15 config = cm_config() user = config["cloudmesh"]["hpc"]["username"] if host is None: hosts = ["india.futuregrid.org"] elif host in [ 'bravo.futuregrid.org', 'echo.futuregrid.org', 'delta.futuregrid.org' ]: hosts = ['india.futuregrid.org'] else: hosts = [host] error = "" try: pbs = pbs_mongo() for host in hosts: pbs.activate(host, user) res = pbs.refresh_qinfo(host) except Exception, e: print traceback.format_exc() error = "{0}".format(e) log.error(error) category = "qinfo-{0}".format(host) flash(error, category=str(category))
def refresh_qstat(hosts): # checks the mongodb for last qstat refresh and if it is """ Launches the recipies on the server as per the task_dict. The task dict should the following properties name: name of the server recipies: a list of tuples where the first element of the tuple would be name of recipie and second would be the type host: information about the host """ max_time_diff_allowed = 30 # indicates 30 seconds of time difference allowed between old and new values config = cm_config() user = config["cloudmesh"]["hpc"]["username"] pbs = pbs_mongo() error = "" print "task recieved" for host in hosts: time = datetime.datetime.now() datetime.datetime.now() data = pbs.get_qstat(host) perform_refresh = False jobcount = data.count() if jobcount > 0: last_refresh = data[0]["cm_refresh"] time_diff = time - last_refresh if time_diff.seconds > max_time_diff_allowed: perform_refresh = True else: perform_refresh = True if perform_refresh: print "Beginning refresh for {0}".format(host) pbs.activate(host, user) try: d = pbs.refresh_qstat(host) except Exception, e: error += "error {0} {1}".format(str(host), str(e)) else: print "No refresh needed for {0}".format(host)
def setup(self): config = cm_config() self.user = config.get()["hpc"]["username"] self.host = "india.futuregrid.org" self.pbs = pbs_mongo() self.pbs.activate(self.host, "gvonlasz") print "SETUP PBS HOSTS", self.pbs.hosts
def display_mongo_qstat_refresh(host=None): celery_config = ConfigDict(filename=config_file("/cloudmesh_celery.yaml")) log.info ("qstat refresh request {0}".format(host)) # timeout = 15; config = cm_config() user = config["cloudmesh"]["hpc"]["username"] if host is None: hosts = ["india.futuregrid.org", "lima.futuregrid.org", "sierra.futuregrid.org", "hotel.futuregrid.org", "alamo.futuregrid.org"] elif host in ['bravo.futuregrid.org', 'echo.futuregrid.org', 'delta.futuregrid.org']: hosts = ['india.futuregrid.org'] else: hosts = [host] error = "" try: pbs = pbs_mongo() # queue = celery_config = celery_config.get("cloudmesh.workers.qstat.queue") # res = tasks.refresh_qstat.apply_async(queue=queue, priority=0, args=[hosts]) for host in hosts: pbs.activate(host, user) pbs.refresh_qstat(host) # error = res.get(timeout=timeout) except Exception, e: print traceback.format_exc() error = "{0}".format(e) log.error(error) return render_template('error.html', error=error, type="Some error in qstat", msg="")
def display_mongo_qstat_refresh(host=None): celery_config = ConfigDict(filename=config_file("/cloudmesh_celery.yaml")) log.info("qstat refresh request {0}".format(host)) # timeout = 15; config = cm_config() user = config["cloudmesh"]["hpc"]["username"] if host is None: hosts = [ "india.futuregrid.org", "lima.futuregrid.org", "sierra.futuregrid.org", "hotel.futuregrid.org", "alamo.futuregrid.org" ] elif host in [ 'bravo.futuregrid.org', 'echo.futuregrid.org', 'delta.futuregrid.org' ]: hosts = ['india.futuregrid.org'] else: hosts = [host] error = "" try: pbs = pbs_mongo() # queue = celery_config = celery_config.get("cloudmesh.workers.qstat.queue") # res = tasks.refresh_qstat.apply_async(queue=queue, priority=0, args=[hosts]) for host in hosts: pbs.activate(host, user) pbs.refresh_qstat(host) # error = res.get(timeout=timeout) except Exception, e: print traceback.format_exc() error = "{0}".format(e) log.error(error) return render_template('error.html', error=error, type="Some error in qstat", msg="")
def display_mongo_qstat_refresh(host=None): celery_config = ConfigDict(filename=config_file("/cloudmesh_celery.yaml")) log.info("qstat refresh request {0}".format(host)) # timeout = 15; config = cm_config() user = config["cloudmesh"]["hpc"]["username"] if host is None: hosts = ["india.futuregrid.org"] elif host in [ 'bravo.futuregrid.org', 'echo.futuregrid.org', 'delta.futuregrid.org' ]: hosts = ['india.futuregrid.org'] else: hosts = [host] error = "" try: pbs = pbs_mongo() # queue = celery_config = celery_config.get("cloudmesh.workers.qstat.queue") # res = tasks.refresh_qstat.apply_async(queue=queue, priority=0, args=[hosts]) for host in hosts: pbs.activate(host, user) log.info("refresh qstat: {0} {1}".format(host, user)) pbs.refresh_qstat(host) # error = res.get(timeout=timeout) except Exception, e: print traceback.format_exc() error = "{0}".format(e) log.error(error) category = "qstat-{0}".format(host) flash(error, category=str(category))
def display_mongo_qstat_refresh(host=None): celery_config = ConfigDict(filename=config_file("/cloudmesh_celery.yaml")) log.info("qstat refresh request {0}".format(host)) # timeout = 15; config = cm_config() user = config["cloudmesh"]["hpc"]["username"] if host is None: hosts = ["india.futuregrid.org"] elif host in ['bravo.futuregrid.org', 'echo.futuregrid.org', 'delta.futuregrid.org']: hosts = ['india.futuregrid.org'] else: hosts = [host] error = "" try: pbs = pbs_mongo() # queue = celery_config = celery_config.get("cloudmesh.workers.qstat.queue") # res = tasks.refresh_qstat.apply_async(queue=queue, priority=0, args=[hosts]) for host in hosts: pbs.activate(host, user) log.info("refresh qstat: {0} {1}".format(host, user)) pbs.refresh_qstat(host) # error = res.get(timeout=timeout) except Exception, e: print traceback.format_exc() error = "{0}".format(e) log.error(error) category = "qstat-{0}".format(host) flash(error, category=str(category))
def display_mongo_qinfo_refresh(host=None): log.info ("qinfo refresh request {0}".format(host)) timeout = 15; config = cm_config() user = config["cloudmesh"]["hpc"]["username"] if host is None: hosts = ["india.futuregrid.org", "lima.futuregrid.org", "sierra.futuregrid.org", "hotel.futuregrid.org", "alamo.futuregrid.org"] elif host in ['bravo.futuregrid.org', 'echo.futuregrid.org', 'delta.futuregrid.org']: hosts = ['india.futuregrid.org'] else: hosts = [host] error = "" try: pbs = pbs_mongo() for host in hosts: pbs.activate(host, user) res = pbs.refresh_qinfo(host) except Exception, e: print traceback.format_exc() error = "{0}".format(e) log.error(error) return render_template('error.html', error=error, type="Some error in qinfo", msg="")
def display_mongo_qinfo_refresh(host=None): log.info("qinfo refresh request {0}".format(host)) timeout = 15 config = cm_config() user = config["cloudmesh"]["hpc"]["username"] if host is None: hosts = [ "india.futuregrid.org", "lima.futuregrid.org", "sierra.futuregrid.org", "hotel.futuregrid.org", "alamo.futuregrid.org" ] elif host in [ 'bravo.futuregrid.org', 'echo.futuregrid.org', 'delta.futuregrid.org' ]: hosts = ['india.futuregrid.org'] else: hosts = [host] error = "" try: pbs = pbs_mongo() for host in hosts: pbs.activate(host, user) res = pbs.refresh_qinfo(host) except Exception, e: print traceback.format_exc() error = "{0}".format(e) log.error(error) return render_template('error.html', error=error, type="Some error in qinfo", msg="")
def display_mongo_qstat_new(): time_now = datetime.now() address_string = "" error = "" config = cm_config() user = config["cloudmesh"]["hpc"]["username"] pbs = pbs_mongo() hosts = ["india.futuregrid.org", "echo.futuregrid.org", "delta.futuregrid.org", "bravo.futuregrid.org"] # for host in hosts: # pbs.activate(host,user) data = {} jobcount = {} timer = {} for host in hosts: timer[host] = datetime.now() try: data[host] = pbs.get_qstat(host) except: log.error("get_qstat {0}".format(host)) error += "get_qstat({0})".format(host) try: jobcount[host] = data[host].count() except: error += "jobcount {0}".format(host) log.error("jobcount {0}".format(host)) if jobcount[host] > 0: timer[host] = data[host][0]["cm_refresh"] # pprint(data[host][0]) else: timer[host] = datetime.now() attributes = { "pbs": [ ["Queue", "queue"], # [ "Server" , "server"], ["State", "job_state"], ["Name", "Job_Name"], ["Owner", "Job_Owner"], ["NCpus", "Resource_List", "ncpus"], ["Walltime", "Resource_List", "walltime"], ["Nodes", "Resource_List", "nodes"], ["Nodect", "Resource_List", "nodect"], # [ "ctime", "ctime"], ["mtime", "mtime"], ["qtime", "qtime"], ["Used Cpu Time", "resources_used", "cput"], ["Used Mem ", "resources_used", "mem"], ["Used VMem ", "resources_used", "vmem"], ["Used Cpu Walltime", "resources_used", "walltime"], ] } """ for host in hosts: pprint (host) for server in data[host]: print "S", server for attribute in server: print attribute, server[attribute] """ return render_template( "mesh/hpc/mesh_qstat.html", hosts=hosts, jobcount=jobcount, timer=timer, address_string=address_string, attributes=attributes, updated=time_now, qstat=data, error=error, config=config, )
def display_mongo_qinfo(): time_now = datetime.now() address_string = "" error = "" config = cm_config() user = config["cloudmesh"]["hpc"]["username"] pbs = pbs_mongo() hosts = ["india.futuregrid.org", "echo.futuregrid.org", "delta.futuregrid.org", "bravo.futuregrid.org"] # for host in hosts: # pbs.activate(host,user) data = {} jobcount = {} timer = {} for host in hosts: timer[host] = datetime.now() try: data[host] = pbs.get_qinfo(host) except: log.error("get_qinfo {0}".format(host)) error += "get_qinfo({0})".format(host) try: jobcount[host] = data[host].count() if jobcount[host] > 0: timer[host] = data[host][0]["cm_refresh"] # pprint(data[host][0]) else: timer[host] = datetime.now() except: error += "jobcount {0}".format(host) attributes = { "pbs": [ ["Queue", "queue"], # [ "Server" , "server"], ["State", "started"], ["Type", "queue_type"], ["Walltime", "resources_default_walltime"], ["Total", "total_jobs"], ["Exiting", "state_count", "Exiting"], ["Held", "state_count", "Held"], ["Queued", "state_count", "Queued"], ["Running", "state_count", "Running"], ["Transit", "state_count", "Transit"], ["Waiting", "state_count", "Waiting"], ] } """ for host in hosts: pprint (host) for server in data[host]: print "S", server for attribute in server: print attribute, server[attribute] """ return render_template( "mesh/hpc/mesh_qinfo.html", hosts=hosts, jobcount=jobcount, timer=timer, address_string=address_string, attributes=attributes, updated=time_now, qinfo=data, error=error, config=config, )
def display_mongo_qinfo(): time_now = datetime.now() address_string = "" error = "" config = cm_config() user = config["cloudmesh"]["hpc"]["username"] pbs = pbs_mongo() hosts = [ "india.futuregrid.org", "echo.futuregrid.org", "delta.futuregrid.org", "bravo.futuregrid.org", "sierra.futuregrid.org", "hotel.futuregrid.org", "lima.futuregrid.org", "alamo.futuregrid.org" ] # for host in hosts: # pbs.activate(host,user) data = {} jobcount = {} timer = {} for host in hosts: timer[host] = datetime.now() try: data[host] = pbs.get_qinfo(host) except: log.error("get_qinfo {0}".format(host)) error += "get_qinfo({0})".format(host) try: jobcount[host] = data[host].count() if jobcount[host] > 0: timer[host] = data[host][0]["cm_refresh"] # pprint(data[host][0]) else: timer[host] = datetime.now() except: error += "jobcount {0}".format(host) attributes = { "pbs": [ ["Queue", "queue"], # [ "Server" , "server"], ["State", "started"], ["Type", "queue_type"], ["Walltime", "resources_default_walltime"], ["Total", "total_jobs"], ["Exiting", "state_count", "Exiting"], ["Held", "state_count", "Held"], ["Queued", "state_count", "Queued"], ["Running", "state_count", "Running"], ["Transit", "state_count", "Transit"], ["Waiting", "state_count", "Waiting"], ], } """ for host in hosts: pprint (host) for server in data[host]: print "S", server for attribute in server: print attribute, server[attribute] """ return render_template('mesh/hpc/mesh_qinfo.html', hosts=hosts, jobcount=jobcount, timer=timer, address_string=address_string, attributes=attributes, updated=time_now, qinfo=data, error=error, config=config)
def display_mongo_qstat_new(): time_now = datetime.now() address_string = "" error = "" config = cm_config() user = config["cloudmesh"]["hpc"]["username"] pbs = pbs_mongo() hosts = [ "india.futuregrid.org", "echo.futuregrid.org", "delta.futuregrid.org", "bravo.futuregrid.org", "sierra.futuregrid.org", "hotel.futuregrid.org", "lima.futuregrid.org", "alamo.futuregrid.org" ] #hosts = ["india.futuregrid.org", # "lima.futuregrid.org", # "sierra.futuregrid.org", # "hotel.futuregrid.org", # "alamo.futuregrid.org"] # for host in hosts: # pbs.activate(host,user) data = {} jobcount = {} timer = {} for host in hosts: timer[host] = datetime.now() try: data[host] = pbs.get_qstat(host) except: log.error("get_qstat {0}".format(host)) error += "get_qstat({0})".format(host) try: jobcount[host] = data[host].count() except: error += "jobcount {0}".format(host) log.error("jobcount {0}".format(host)) if jobcount[host] > 0: timer[host] = data[host][0]["cm_refresh"] # pprint(data[host][0]) else: timer[host] = datetime.now() attributes = { "pbs": [ ["Queue", "queue"], # [ "Server" , "server"], ["State", "job_state"], ["Name", "Job_Name"], ["Owner", "Job_Owner"], ["NCpus", "Resource_List", "ncpus"], ["Walltime", "Resource_List", "walltime"], ["Nodes", "Resource_List", "nodes"], ["Nodect", "Resource_List", "nodect"], # [ "ctime", "ctime"], ["mtime", "mtime"], ["qtime", "qtime"], ["Used Cpu Time", "resources_used", 'cput'], ["Used Mem ", "resources_used", 'mem'], ["Used VMem ", "resources_used", 'vmem'], ["Used Cpu Walltime", "resources_used", 'walltime'] ], } """ for host in hosts: pprint (host) for server in data[host]: print "S", server for attribute in server: print attribute, server[attribute] """ return render_template('mesh/hpc/mesh_qstat.html', hosts=hosts, jobcount=jobcount, timer=timer, address_string=address_string, attributes=attributes, updated=time_now, qstat=data, error=error, config=config)