def _set_vms_ip_mac(self): """Not finished """ if isinstance(self.ip_mac, dict): i_vm = {site: 0 for site in self.sites} for vm in self.vms: vm_site = get_host_site(vm['host']) vm['ip'], vm['mac'] = self.ip_mac[vm_site][i_vm[vm_site]] i_vm[vm_site] += 1 else: i_vm = 0 for vm in self.vms: vm['ip'], vm['mac'] = self.ip_mac[i_vm] i_vm += 1
def get_hosts_jobs(hosts, walltime, out_of_chart=False): """Find the first slot when the hosts are available and return a list of jobs_specs :param hosts: list of hosts :param walltime: duration of reservation """ hosts = map(lambda x: x.address if isinstance(x, Host) else x, hosts) planning = get_planning(elements=hosts, out_of_chart=out_of_chart) limits = _slots_limits(planning) walltime = get_seconds(walltime) for limit in limits: all_host_free = True for site_planning in planning.itervalues(): for cluster, cluster_planning in site_planning.iteritems(): if cluster in get_g5k_clusters(): for host_planning in cluster_planning.itervalues(): host_free = False for free_slot in host_planning['free']: if free_slot[0] <= limit and free_slot[ 1] >= limit + walltime: host_free = True if not host_free: all_host_free = False if all_host_free: startdate = limit break else: logger.error('Unable to find a slot for %s', hosts) return None jobs_specs = [] for site in planning.keys(): site_hosts = map(get_host_longname, filter(lambda h: get_host_site(h) == site, hosts)) sub_res = "{host in ('" + "','".join(site_hosts) + "')}/nodes=" + str( len(site_hosts)) jobs_specs.append((OarSubmission(resources=sub_res, reservation_date=startdate), site)) return jobs_specs
def get_hosts_jobs(hosts, walltime, out_of_chart=False): """Find the first slot when the hosts are available and return a list of jobs_specs :param hosts: list of hosts :param walltime: duration of reservation """ hosts = map(lambda x: x.address if isinstance(x, Host) else x, hosts) planning = get_planning(elements=hosts, out_of_chart=out_of_chart) limits = _slots_limits(planning) walltime = get_seconds(walltime) for limit in limits: all_host_free = True for site_planning in planning.itervalues(): for cluster, cluster_planning in site_planning.iteritems(): if cluster in get_g5k_clusters(): for host_planning in cluster_planning.itervalues(): host_free = False for free_slot in host_planning['free']: if free_slot[0] <= limit and free_slot[1] >= limit + walltime: host_free = True if not host_free: all_host_free = False if all_host_free: startdate = limit break else: logger.error('Unable to find a slot for %s', hosts) return None jobs_specs = [] for site in planning.keys(): site_hosts = map(get_host_longname, filter(lambda h: get_host_site(h) == site, hosts)) sub_res = "{host in ('" + "','".join(site_hosts) + "')}/nodes=" + str(len(site_hosts)) jobs_specs.append((OarSubmission(resources=sub_res, reservation_date=startdate), site)) return jobs_specs
def benchmark_metrics(self): nb_diet_success = 0 nb_diet_error = 0 nb_diet_nofound = 0 bench_size = "regular" log_repository = "/root/dietg/log/" servers = [host for host in self.servers] logger.info("Clear bench files") cmd = "rm "+log_repository+"/flops_watts.bench 2> /dev/null; "+"rm "+log_repository+"/conso.bench 2> /dev/null; "+"rm "+log_repository+"/flops.bench 2> /dev/null; " a = Remote(cmd, servers, connection_params = root_connection_params).run() # for s in a.processes: # pout = s.stdout # logger.debug(pout) # # Initialise the clients clients = [self.clients] logger.info("Initialize client on node %s",clients) cmd = "cd "+sched_dir+"; make clean && make" a = Remote(cmd, clients, connection_params = root_connection_params).run() for s in a.processes: pout = s.stdout logger.info(pout) cmd = "cd /root/dietg/; ./set_client.sh" a = Remote(cmd, clients, connection_params = root_connection_params).run() for s in a.processes: pout = s.stdout logger.info(pout) logger.info("Benchmark_metrics") logger.info("Another bench just started!") start = time.time() array_process = set() for x in range(len(self.servers)): cmd = "cd "+sched_dir+"; ./client_"+bench_size a = Remote(cmd, clients, connection_params = root_connection_params).start() array_process.add(a) for process in array_process: process.wait() for s in process.processes: pout = s.stdout logger.debug(pout) if "no server found" in pout: nb_diet_nofound += 1 elif "diet call error" in pout: nb_diet_error += 1 elif "diet call success" in pout: nb_diet_success += 1 logger.info("All the bench are terminated (success = %s) | (error =%s) | (no server found = %s)",str(nb_diet_success),str(nb_diet_error),str(nb_diet_nofound)) end = time.time() makespan = end - start; if makespan < 5: logger.info("Benchmark has failed to execute! Another try will occur!") return False logger.info("Total makespan = %d",makespan) resolution = 15 # Electric consumption_bench metrics / CONSO logger.info("Retrieve consumption_bench per SeD") bench_file = "conso.bench" for sed in self.servers: self.consumption_bench[sed] = get_g5k_api_measures(sed, get_host_site(sed), "pdu", start, end, resolution) logger.debug("Electric Consumption of %s = %sW (%sJ)",sed,self.consumption_bench[sed],self.consumption_bench[sed]*makespan) with open(bench_file, "w") as file1: file1.write(str(self.consumption_bench[sed]*makespan)) file1.write("\n") file1.close() os.system("scp "+bench_file+" root@"+sed+":"+log_repository) #.g5k # Performance / FLOPS logger.info("Retrieve FLOPS per SeD") bench_file = "flops.bench" for sed in self.servers: self.flops_bench[sed] = get_host_attributes(sed)['performance']['node_flops'] logger.debug("Flops Number of %s = %s",sed,self.flops_bench[sed]) with open(bench_file, "w") as file1: file1.write(str(self.flops_bench[sed])) file1.write("\n") file1.close() os.system("scp "+bench_file+" root@"+sed+":"+log_repository+" > /dev/null") # Flops per Watts logger.info("Retrieve FLOPS/Watts per SeD") bench_file = "flops_watts.bench" for sed in self.servers: self.flops_watts_bench[sed] = self.flops_bench[sed] / self.consumption_bench[sed] logger.debug("flops_watt of %s = %s",sed,self.flops_watts_bench[sed]) with open(bench_file, "w") as file1: file1.write(str(self.flops_watts_bench[sed])) file1.write("\n") file1.close() os.system("scp "+bench_file+" root@"+sed+":"+log_repository+" > /dev/null") logger.info("Benchmark_metrics termine") return True
def retrieve_results(self,start,end): self.makespan = end - start; resolution = 15 self.consumption["total"] = 0 now = strftime("%d_%b_%H:%M", gmtime()) folder_name = "results_"+self.oargrid_job_id+"_"+self.scheduler if not os.path.exists(folder_name): os.makedirs(folder_name) filename = "./"+folder_name+"/%s_%s.log" % (self.scheduler,self.oargrid_job_id) already_exists = True counter = 0 while already_exists: try: with open(filename): already_exists = True counter += 1 filename = "%s_%s_%s.log" % (self.scheduler,self.oargrid_job_id,str(counter)) except IOError: already_exists = False # Increments counter to change file name file_results = filename with open(file_results, "a") as fichier_resultats: fichier_resultats.write("Use Rate = %s"%(str(self.useRate))) fichier_resultats.write("Exp Time = %s"%(str(self.exp_time))) fichier_resultats.write("\n\n") logger.info("Retrieve consumption per SeD") for sed in self.servers: self.consumption[sed] = get_g5k_api_measures(sed, get_host_site(sed), "pdu", start, end, resolution) self.consumption["total"] += float(self.consumption[sed]) logger.info("Electric Consumption of %s = %s",sed,self.consumption[sed]) fichier_resultats.write("Electric Consumption of %s = %sW (%sJ)"%(sed,self.consumption[sed],self.consumption[sed]*self.makespan)) fichier_resultats.write("\n") logger.info("Retrieve consumption per MA") for MA in self.MA: self.consumption[MA] = get_g5k_api_measures(MA, get_host_site(MA), "pdu", start, end, resolution) self.consumption["total"] += float(self.consumption[MA]) logger.info("Electric Consumption of %s = %s",MA,self.consumption[MA]) fichier_resultats.write("Electric Consumption of %s = %sW (%sJ)"%(MA,self.consumption[MA],self.consumption[MA]*self.makespan)) fichier_resultats.write("\n") logger.info("Retrieve total consumption") logger.info("Electric Consumption of the architecture") print self.consumption["total"] fichier_resultats.write("Total consumption = %sW (%sJ)"%(self.consumption["total"],self.consumption["total"]*self.makespan)) fichier_resultats.write("\n") logger.info("Retrieve number of tasks per SeD") self.get_nb_tasks_server() for host,task in self.nb_tasks.iteritems(): logger.debug("%s : %d tasks",host,task) fichier_resultats.write("%s : %d tasks"%(host,task)) fichier_resultats.write("\n") logger.info("Retrieve SeD log files") nb_files = self.get_logs_from_server() logger.info("%s / %s files were retrieved",str(nb_files),str(len(self.servers))) logger.info("Retrieve total makespan") logger.info("Total makespan = %d",self.makespan) fichier_resultats.write("Total makespan = %s"%(self.makespan)) fichier_resultats.write("\n") fichier_resultats.close()
def get_planning(elements=['grid5000'], vlan=False, subnet=False, storage=False, out_of_chart=False, starttime=None, endtime=None, ignore_besteffort=True, queues='default'): """Retrieve the planning of the elements (site, cluster) and others resources. Element planning structure is ``{'busy': [(123456,123457), ... ], 'free': [(123457,123460), ... ]}.`` :param elements: a list of Grid'5000 elements ('grid5000', <site>, <cluster>) :param vlan: a boolean to ask for KaVLAN computation :param subnet: a boolean to ask for subnets computation :param storage: a boolean to ask for sorage computation :param out_of_chart: if True, consider that days outside weekends are busy :param starttime: start of time period for which to compute the planning, defaults to now + 1 minute :param endtime: end of time period for which to compute the planning, defaults to 4 weeks from now :param ignore_besteffort: True by default, to consider the resources with besteffort jobs as available :param queues: list of oar queues for which to get the planning Return a dict whose keys are sites, whose values are dict whose keys are cluster, subnets, kavlan or storage, whose values are planning dicts, whose keys are hosts, subnet address range, vlan number or chunk id planning respectively. """ if not starttime: starttime = int(time() + timedelta_to_seconds(timedelta(minutes=1))) starttime = int(get_unixts(starttime)) if not endtime: endtime = int(starttime + timedelta_to_seconds(timedelta(weeks=4, minutes=1))) endtime = int(get_unixts(endtime)) if 'grid5000' in elements: sites = elements = get_g5k_sites() else: sites = list( set([site for site in elements if site in get_g5k_sites()] + [ get_cluster_site(cluster) for cluster in elements if cluster in get_g5k_clusters(queues=queues) ] + [ get_host_site(host) for host in elements if host in get_g5k_hosts() or get_host_shortname(host) in get_g5k_hosts() ])) if len(sites) == 0: logger.error('Wrong elements given: %s' % (elements, )) return None planning = {} for site in sites: planning[site] = {} for cluster in get_site_clusters(site, queues=queues): planning[site][cluster] = {} for site in sites: if vlan: planning[site].update({'vlans': {}}) if subnet: planning[site].update({'subnets': {}}) if storage: planning[site].update({'storage': {}}) if _retrieve_method == 'API': _get_planning_API(planning, ignore_besteffort) elif _retrieve_method == 'PostgreSQL': _get_planning_PGSQL(planning, ignore_besteffort) if out_of_chart: _add_charter_to_planning(planning, starttime, endtime) for site_pl in planning.values(): for res_pl in site_pl.values(): for el_planning in res_pl.values(): el_planning['busy'].sort() _merge_el_planning(el_planning['busy']) _trunc_el_planning(el_planning['busy'], starttime, endtime) _fill_el_planning_free(el_planning, starttime, endtime) # cleaning real_planning = deepcopy(planning) for site, site_pl in planning.items(): for cl, cl_pl in site_pl.items(): if cl in ['vlans']: continue keep_cluster = False for h in cl_pl: if not (get_host_site(h) in elements or get_host_cluster(h) in elements or get_host_shortname(h) in elements or h in elements): del real_planning[site][cl][h] else: keep_cluster = True if not keep_cluster: del real_planning[site][cl] return real_planning
def get_jobs_specs(resources, excluded_elements=None, name=None): """ Generate the several job specifications from the dict of resources and the blacklisted elements :param resources: a dict, whose keys are Grid'5000 element and values the corresponding number of n_nodes :param excluded_elements: a list of elements that won't be used :param name: the name of the jobs that will be given """ jobs_specs = [] if excluded_elements == None: excluded_elements = [] # Creating the list of sites used sites = [] real_resources = resources.copy() for resource in resources: if resource in get_g5k_sites() and resource not in sites: sites.append(resource) if resource in get_g5k_clusters(queues=None): if resource not in excluded_elements: site = get_cluster_site(resource) if site not in sites: sites.append(site) if site not in real_resources: real_resources[site] = 0 # Checking if we need a Kavlan, a KaVLAN global or none get_kavlan = 'kavlan' in resources if get_kavlan: kavlan = 'kavlan' n_sites = 0 for resource in real_resources: if resource in sites: n_sites += 1 if n_sites > 1: kavlan += '-global' break blacklisted_hosts = {} for element in excluded_elements: if element not in get_g5k_clusters(queues=None) + get_g5k_sites(): site = get_host_site(element) if not 'site' in blacklisted_hosts: blacklisted_hosts[site] = [element] else: blacklisted_hosts[site].append(element) for site in sites: sub_resources = '' # Adding a KaVLAN if needed if get_kavlan: if not 'global' in kavlan: sub_resources = "{type='" + kavlan + "'}/vlan=1+" get_kavlan = False elif site in resources['kavlan']: sub_resources = "{type='" + kavlan + "'}/vlan=1+" get_kavlan = False base_sql = '{' end_sql = '}/' # Creating blacklist SQL string for hosts host_blacklist = False str_hosts = '' if site in blacklisted_hosts and len(blacklisted_hosts[site]) > 0: str_hosts = ''.join([ "host not in ('" + get_host_longname(host) + "') and " for host in blacklisted_hosts[site] ]) host_blacklist = True #Adding the clusters blacklist str_clusters = str_hosts if host_blacklist else '' cl_blacklist = False clusters_nodes = 0 for cluster in get_site_clusters(site, queues=None): if cluster in resources and resources[cluster] > 0: if str_hosts == '': sub_resources += "{cluster='" + cluster + "'}" else: sub_resources += base_sql + str_hosts + "cluster='" + \ cluster + "'" + end_sql sub_resources += "/nodes=" + str(resources[cluster]) + '+' clusters_nodes += resources[cluster] if cluster in excluded_elements: str_clusters += "cluster not in ('" + cluster + "') and " cl_blacklist = True # Generating the site blacklist string from host and cluster blacklist str_site = '' if host_blacklist or cl_blacklist: str_site += base_sql if not cl_blacklist: str_site += str_hosts[:-4] else: str_site += str_clusters[:-4] str_site = str_site + end_sql if real_resources[site] > 0: sub_resources += str_site + "nodes=" + str(real_resources[site]) +\ '+' if sub_resources != '': jobs_specs.append((OarSubmission(resources=sub_resources[:-1], name=name), site)) return jobs_specs