def pbs_service(rack_name): ''' get all server type with the help of PBS ''' log.debug("pbs_service_task recieved") rack_work = RackWork() tdict = rack_work.pbs_service(rack_name) rackdata = RackData() rackdata.server_refresh_update_service(rack_name, tdict)
def temperature(host_name, rack_name, unit): ''' get the temperature of 'host_name' with the help of ipmi API ''' log.debug("temperature_task recieved") ipmi_temp = Temperature() tdict = ipmi_temp.get_ipmi_temperature(host_name) rackdata = RackData() rackdata.server_refresh_update_temperature(rack_name, host_name, tdict)
def read_data_from_mongodb(rack_name="all", unit='C'): """ Read data from mongo db """ rack_data = RackData() rack_temperature = RackTemperature() raw_data_dict = rack_data.get_rack_temperature_data(rack_name) if raw_data_dict is None: return None dict_data = {} for rack_name in raw_data_dict: dict_data[rack_name] = {} for host in raw_data_dict[rack_name]: result = rack_temperature.parse_max_temp(raw_data_dict[rack_name][host], unit) dict_data[rack_name][host] = result["value"] return dict_data
def __init__(self, username=None, valid_time=1800): self.username = username self.set_data_valid_time(valid_time) self.rackdata = RackData(self.data_valid_time) self.rackwork = RackWork(self.username)
class FetchClusterInfo: map_progress = None username = None data_valid_time = 0 # default data valid time is set to 1800 seconds def __init__(self, username=None, valid_time=1800): self.username = username self.set_data_valid_time(valid_time) self.rackdata = RackData(self.data_valid_time) self.rackwork = RackWork(self.username) # get/set data valid time in mongo db def get_data_valid_time(self): return self.data_valid_time def set_data_valid_time(self, valid_time): self.data_valid_time = valid_time if valid_time > 0 else 0 # start async call to refresh racks def start_async_refresh(self, type, rack_name, server): if type == self.rackdata.TEMPERATURE_NAME: self.rackdata.mydebug( "start_async_refresh, BEGIN delay start server {2} of {1}-{0}". format(type, rack_name, server)) temperature.apply_async((server, rack_name, 'C'), queue='rack') self.rackdata.mydebug( "start_async_refresh, END delay start server {2} of {1}-{0}". format(type, rack_name, server)) elif type == self.rackdata.SERVICE_NAME: self.rackdata.mydebug( "start_async_refresh, BEGIN delay start pbs_service of {1}-{0}" .format(type, rack_name)) pbs_service.apply_async([rack_name], queue='rack') self.rackdata.mydebug( "start_async_refresh, END delay start pbs_service of {1}-{0}". format(type, rack_name)) else: log.debug("NOT Supported to refresh {0} status of {1}".format( type, rack_name)) # refresh rack status data def refresh_rack_data(self, type, rack_name): self.rackdata.mydebug("enter into refresh_rack_data of {0}-{1}".format( rack_name, type)) # check the status of 'rack_status' refresh_dict = self.rackdata.can_start_refresh(type, rack_name) self.map_progress.set_check_refresh_condition() if refresh_dict is None: return False total_hosts = {} if type == self.rackdata.TEMPERATURE_NAME: for rack_name in refresh_dict: if refresh_dict[rack_name]: total_hosts[rack_name] = { "total": 0, "updated": 0, "ratio": 0 } # change the status of 'rack_status' to 'refresh' self.rackdata.set_status_start_refresh(type, rack_name) hosts = self.rackdata.inventory.hostlist(rack_name) total_hosts[rack_name]["total"] = len(hosts) self.map_progress.update_data( "temperature_data", {rack_name: total_hosts[rack_name]}) # fetch data for each host for server in hosts: self.start_async_refresh(type, rack_name, server) elif type == self.rackdata.SERVICE_NAME: if reduce(lambda x, y: x or y, [refresh_dict[name] for name in refresh_dict], False): for rack_name in refresh_dict: total_hosts[rack_name] = { "total": 0, "updated": 0, "ratio": 0 } # change the status of 'rack_status' to 'refresh' self.rackdata.set_status_start_refresh(type, rack_name) hosts = self.rackdata.inventory.hostlist(rack_name) total_hosts[rack_name]["total"] = len(hosts) self.map_progress.update_data("service_data", total_hosts) # service refresh operation can update all rack ONLY once self.start_async_refresh(type, rack_name, None) self.rackdata.mydebug( "exit from refresh_rack_data of {0}-{1} with refresh data {2}". format(rack_name, type, refresh_dict)) return True # refresh rack temperature def refresh_rack_temperature(self, rack_name): return self.refresh_rack_data(self.rackdata.TEMPERATURE_NAME, rack_name) # refresh rack service def refresh_rack_service(self, rack_name): return self.refresh_rack_data(self.rackdata.SERVICE_NAME, rack_name) # API of generate map def start_gen_map(self, service, rack_name): t = threading.Thread(target=self.gen_map_thread, args=[service, rack_name]) t.start() return True # API of refresh map # return True means start refresh process # False means NOT start refresh process, "fresh" gives the reason # if "fresh" is True, that means the data in db is fresh, does NOT need a refresh # Flase, means ERROR occured in db def start_refresh_map(self, service, rack_name): result = False flag_fresh = False refresh_dict = self.rackdata.can_start_refresh(service, rack_name) if refresh_dict: result = reduce(lambda x, y: x or y, [refresh_dict[name] for name in refresh_dict], False) if result: t = threading.Thread(target=self.refresh_map_thread, args=[service, rack_name]) t.start() else: flag_fresh = True return {"result": result, "fresh": flag_fresh} # refresh map thread def refresh_map_thread(self, service, rack_name): self.get_map_progress(service) #self.map_progress.set_refresh_map() flag_data_ready = False if self.refresh_rack_data(service, rack_name): flag_data_ready = self.check_rack_refresh_status( service, rack_name) if flag_data_ready: self.rackwork.generate_map(service, rack_name, True) else: self.map_progress.set_error_status( ) # refresh error, return to user # generate map thread def gen_map_thread(self, service, rack_name): self.get_map_progress(service) flag_read_refresh_data = False flag_data_ready = False if self.rackdata.is_rack_data_ready(service, rack_name): flag_data_ready = True else: self.map_progress.set_load_refresh_map() if self.refresh_rack_data(service, rack_name): flag_read_refresh_data = True flag_data_ready = self.check_rack_refresh_status( service, rack_name) if flag_data_ready: self.rackwork.generate_map(service, rack_name, flag_read_refresh_data) else: self.map_progress.set_error_status( ) # read db error, return to user # update the status of async data refresh def update_data_refresh_status(self, service): data_dict = self.map_progress.get_data("{0}_data".format(service)) total_count = 0 total_updated = 0 for rack_name in data_dict: if data_dict[rack_name]["updated"] < data_dict[rack_name]["total"]: # query db to get recent update status query_dict = self.rackdata.get_rack_query_dict( service, rack_name, self.rackdata.LOCATION_TEMP) result_dict = self.rackdata.partly_query( query_dict[rack_name], {"updated_node": 1}) data_dict[rack_name]["updated"] = result_dict["updated_node"] total_count += data_dict[rack_name]["total"] total_updated += data_dict[rack_name]["updated"] ratio = total_updated * 1.0 / total_count self.map_progress.set_async_refresh(round(ratio, 2)) # check mongo db refresh status # default interval time is 200ms, and the max check count is 10000 def check_rack_refresh_status(self, service, rack_name, interval=0.2, max_count=10000): result = False curr_count = 1 while True: sleep(interval) # default is 200 ms # check and update async data refresh status self.update_data_refresh_status(service) if self.rackdata.is_refresh_rack_data_ready(service, rack_name): self.map_progress.set_async_refresh() result = True break curr_count += 1 if curr_count > max_count: break return result def get_map_progress(self, service): if service == self.rackdata.TEMPERATURE_NAME: self.map_progress = get_temperature_progress(self.username) elif service == self.rackdata.SERVICE_NAME: self.map_progress = get_service_progress(self.username) return self.map_progress
def __init__(self, username=None): self.username = username self.temperature_ipmi = cm_temperature() self.rackdata = RackData(self.username)
class RackWork: map_progress = None username = None def __init__(self, username=None): self.username = username self.temperature_ipmi = cm_temperature() self.rackdata = RackData(self.username) # thread def generate_map(self, service, rack_name, refresh_flag=False): # the following begin to generate map # class name means the specific class to generate map for different service type # method name means the specific method to fetch real data of different service type, # the methods are defined in class FetchClusterInfo service_options = { "temperature": { "class": HeatClusterMap, "method": "read_temperature_mongo", }, "service": { "class": ServiceClusterMap, "method": "read_service_mongo", }, } # update progress satus self.get_map_progress(service) # get location of configuration file, input diag, output image dir_base = config_file("") server_config = cm_config_server() relative_dir_diag = server_config.get("cloudmesh.server.rack.input") relative_dir_image = server_config.get( "cloudmesh.server.rack.diagrams.{0}".format(service)) # log.debug("relative dir image, {0}".format(relative_dir_image)) flask_dir = "static" # guess absolute path of cloudmesh_web rack_py_dir = pwd().strip().split("/") cloudmesh_web_dir = rack_py_dir # log.debug("cloudmesh_web dir, {0}".format(cloudmesh_web_dir)) list_image_dir = [flask_dir] + relative_dir_image.strip().split("/") abs_dir_image = "/".join(cloudmesh_web_dir + list_image_dir) abs_dir_diag = dir_base + "/" + relative_dir_diag # dynamic generate image map_class = service_options[service]["class"]( self.username, rack_name, dir_base, abs_dir_diag, abs_dir_image) # get cluster server data dict_data = None if False: dict_data = map_class.genRandomValues() else: # flag_filter = None if rack_name == "all" else rack_name # If user want to customize the action, user can set optional param here # by calling map_class.set_optional_param(value) # optional param aparam = map_class.get_optional_param() dict_data = getattr(self, service_options[service]["method"])( rack_name, aparam, refresh_flag) # update data map_class.update(dict_data) # plot map map_class.plot() # get image names filename_image = map_class.getImageFilename() filename_legend = map_class.getLegendFilename() image_size = map_class.getImageSize() legend_size = map_class.getImageLegendSize() # log.debug("legend size is: {0}".format(legend_size)) abs_web_path_image = "/".join([""] + list_image_dir + [filename_image]) abs_web_path_legend = "/".join([""] + list_image_dir + [filename_legend]) img_flag = "?" + str(time.time()) map_data = { "map_width": image_size["width"], "map_height": image_size["height"], "legend_width": legend_size["width"], "legend_height": legend_size["height"], "map_url": abs_web_path_image + img_flag, "legend_url": abs_web_path_legend + img_flag, } self.map_progress.update_data("map_data", map_data) # celery task def pbs_service(self, rack_name=None): config = cm_config() username = config.get("cloudmesh.hpc.username") pbs = PBS(username, "india.futuregrid.org") dict_pbs_info = pbs.pbsnodes() dict_data = {} inventory = Inventory() racks = inventory.get_clusters() for rack in racks: rack_name = rack["cm_cluster"] dict_data[rack_name] = {} hosts = rack["cm_value"] for host in hosts: (hid, hlabel) = inventory.get_host_id_label(host, "public") utype = "unknown" if hlabel in dict_pbs_info: server = dict_pbs_info[hlabel] if "note" in server.keys(): note_value = server["note"] # to compatible with the future change if type(note_value) is dict: utype = note_value["service"] else: # currently is a literal string for note utype = note_value dict_data[rack_name][hid] = utype return dict_data # fetch cluster temperature from mongo db # params: # flag_filter, None or one item in list ['india', 'bravo', 'echo', # 'delta'] def read_temperature_mongo(self, rack_name=None, unit='C', refresh_flag=False): # read data from mongo db rack_data_dict = self.rackdata.get_rack_temperature_data(rack_name) if refresh_flag: self.map_progress.set_read_refresh_data(0.7) else: self.map_progress.set_read_data_from_db(0.7) dict_data = {} for rack_name in rack_data_dict: for host in rack_data_dict[rack_name]: result = self.temperature_ipmi.parse_max_temp( rack_data_dict[rack_name][host], unit) dict_data[host] = result["value"] if refresh_flag: self.map_progress.set_read_refresh_data() else: self.map_progress.set_read_data_from_db() return dict_data # fetch cluster service from mongo db # params: # flag_filter, None or one item in list ['india', 'bravo', 'echo', # 'delta'] def read_service_mongo(self, rack_name=None, unit=None, refresh_flag=False): # read data from mongo db rack_data_dict = self.rackdata.get_rack_service_data(rack_name) if refresh_flag: self.map_progress.set_read_refresh_data(0.7) else: self.map_progress.set_read_data_from_db(0.7) dict_data = {} for rack_name in rack_data_dict: for host in rack_data_dict[rack_name]: dict_data[host] = rack_data_dict[rack_name][host] if refresh_flag: self.map_progress.set_read_refresh_data() else: self.map_progress.set_read_data_from_db() return dict_data def get_map_progress(self, service): if service == self.rackdata.TEMPERATURE_NAME: self.map_progress = get_temperature_progress(self.username) elif service == self.rackdata.SERVICE_NAME: self.map_progress = get_service_progress(self.username) return self.map_progress
class FetchClusterInfo: map_progress = None username = None data_valid_time = 0 # default data valid time is set to 1800 seconds def __init__(self, username=None, valid_time=1800): self.username = username self.set_data_valid_time(valid_time) self.rackdata = RackData(self.data_valid_time) self.rackwork = RackWork(self.username) # get/set data valid time in mongo db def get_data_valid_time(self): return self.data_valid_time def set_data_valid_time(self, valid_time): self.data_valid_time = valid_time if valid_time > 0 else 0 # start async call to refresh racks def start_async_refresh(self, type, rack_name, server): if type == self.rackdata.TEMPERATURE_NAME: self.rackdata.mydebug("start_async_refresh, BEGIN delay start server {2} of {1}-{0}".format(type, rack_name, server)) temperature.apply_async((server, rack_name, 'C'), queue='rack') self.rackdata.mydebug("start_async_refresh, END delay start server {2} of {1}-{0}".format(type, rack_name, server)) elif type == self.rackdata.SERVICE_NAME: self.rackdata.mydebug("start_async_refresh, BEGIN delay start pbs_service of {1}-{0}".format(type, rack_name)) pbs_service.apply_async([rack_name], queue='rack') self.rackdata.mydebug("start_async_refresh, END delay start pbs_service of {1}-{0}".format(type, rack_name)) else: log.debug("NOT Supported to refresh {0} status of {1}".format(type, rack_name)) # refresh rack status data def refresh_rack_data(self, type, rack_name): self.rackdata.mydebug("enter into refresh_rack_data of {0}-{1}".format(rack_name, type)) # check the status of 'rack_status' refresh_dict = self.rackdata.can_start_refresh(type, rack_name) self.map_progress.set_check_refresh_condition() if refresh_dict is None: return False total_hosts = {} if type == self.rackdata.TEMPERATURE_NAME: for rack_name in refresh_dict: if refresh_dict[rack_name]: total_hosts[rack_name] = {"total": 0, "updated": 0, "ratio": 0} # change the status of 'rack_status' to 'refresh' self.rackdata.set_status_start_refresh(type, rack_name) hosts = self.rackdata.inventory.hostlist(rack_name) total_hosts[rack_name]["total"] = len(hosts) self.map_progress.update_data("temperature_data", {rack_name: total_hosts[rack_name]}) # fetch data for each host for server in hosts: self.start_async_refresh(type, rack_name, server) elif type == self.rackdata.SERVICE_NAME: if reduce(lambda x, y: x or y, [refresh_dict[name] for name in refresh_dict], False): for rack_name in refresh_dict: total_hosts[rack_name] = {"total": 0, "updated": 0, "ratio": 0} # change the status of 'rack_status' to 'refresh' self.rackdata.set_status_start_refresh(type, rack_name) hosts = self.rackdata.inventory.hostlist(rack_name) total_hosts[rack_name]["total"] = len(hosts) self.map_progress.update_data("service_data", total_hosts) # service refresh operation can update all rack ONLY once self.start_async_refresh(type, rack_name, None) self.rackdata.mydebug("exit from refresh_rack_data of {0}-{1} with refresh data {2}".format(rack_name, type, refresh_dict)) return True # refresh rack temperature def refresh_rack_temperature(self, rack_name): return self.refresh_rack_data(self.rackdata.TEMPERATURE_NAME, rack_name) # refresh rack service def refresh_rack_service(self, rack_name): return self.refresh_rack_data(self.rackdata.SERVICE_NAME, rack_name) # API of generate map def start_gen_map(self, service, rack_name): t = threading.Thread(target=self.gen_map_thread, args=[service, rack_name]) t.start() return True # API of refresh map # return True means start refresh process # False means NOT start refresh process, "fresh" gives the reason # if "fresh" is True, that means the data in db is fresh, does NOT need a refresh # Flase, means ERROR occured in db def start_refresh_map(self, service, rack_name): result = False flag_fresh = False refresh_dict = self.rackdata.can_start_refresh(service, rack_name) if refresh_dict: result = reduce(lambda x,y: x or y, [refresh_dict[name] for name in refresh_dict], False) if result: t = threading.Thread(target=self.refresh_map_thread, args=[service, rack_name]) t.start() else: flag_fresh = True return {"result": result, "fresh": flag_fresh} # refresh map thread def refresh_map_thread(self, service, rack_name): self.get_map_progress(service) #self.map_progress.set_refresh_map() flag_data_ready = False if self.refresh_rack_data(service, rack_name): flag_data_ready = self.check_rack_refresh_status(service, rack_name) if flag_data_ready: self.rackwork.generate_map(service, rack_name, True) else: self.map_progress.set_error_status() # refresh error, return to user # generate map thread def gen_map_thread(self, service, rack_name): self.get_map_progress(service) flag_read_refresh_data = False flag_data_ready = False if self.rackdata.is_rack_data_ready(service, rack_name): flag_data_ready = True else: self.map_progress.set_load_refresh_map() if self.refresh_rack_data(service, rack_name): flag_read_refresh_data = True flag_data_ready = self.check_rack_refresh_status(service, rack_name) if flag_data_ready: self.rackwork.generate_map(service, rack_name, flag_read_refresh_data) else: self.map_progress.set_error_status() # read db error, return to user # update the status of async data refresh def update_data_refresh_status(self, service): data_dict = self.map_progress.get_data("{0}_data".format(service)) total_count = 0 total_updated = 0 for rack_name in data_dict: if data_dict[rack_name]["updated"] < data_dict[rack_name]["total"]: # query db to get recent update status query_dict = self.rackdata.get_rack_query_dict(service, rack_name, self.rackdata.LOCATION_TEMP) result_dict = self.rackdata.partly_query(query_dict[rack_name], {"updated_node": 1}) data_dict[rack_name]["updated"] = result_dict["updated_node"] total_count += data_dict[rack_name]["total"] total_updated += data_dict[rack_name]["updated"] ratio = total_updated * 1.0 / total_count self.map_progress.set_async_refresh(round(ratio, 2)) # check mongo db refresh status # default interval time is 200ms, and the max check count is 10000 def check_rack_refresh_status(self, service, rack_name, interval=0.2, max_count=10000): result = False curr_count = 1 while True: sleep(interval) # default is 200 ms # check and update async data refresh status self.update_data_refresh_status(service) if self.rackdata.is_refresh_rack_data_ready(service, rack_name): self.map_progress.set_async_refresh() result = True break; curr_count += 1 if curr_count > max_count: break; return result def get_map_progress(self, service): if service == self.rackdata.TEMPERATURE_NAME: self.map_progress = get_temperature_progress(self.username) elif service == self.rackdata.SERVICE_NAME: self.map_progress = get_service_progress(self.username) return self.map_progress