def wait_hosts_down(hosts, timeout=300): """ """ timer = Timer() up_hosts = map(lambda x: x.address if isinstance(x, Host) else x, hosts) fd, hosts_file = mkstemp(dir='/tmp/', prefix='hosts_') with fdopen(fd, 'w') as f: f.write('\n' + '\n'.join(up_hosts)) while len(up_hosts) > 0 and timer.elapsed() < timeout: nmap = Process("nmap -v -oG - -i %s -p 22 |grep Host|grep Status" % (hosts_file, ), shell=True).run() logger.debug('timer: %s \nnmap output: \n%s', timer.elapsed(), nmap.stdout.strip()) for line in nmap.stdout.strip().split('\n'): if 'Down' in line: ip = line.split()[1] get_host = Process('host ' + ip + '| cut -f 5 -d " "', shell=True).run() host = get_host.stdout.strip()[0:-1] if host in up_hosts: logger.detail(host + ' is down') up_hosts.remove(host) Process('rm ' + hosts_file).run() return len(up_hosts) == 0
def get_g5k_api_measures(node_name,site_name,metric,start_timestamp,end_timestamp,resolution): """ Return a dict with the api values""" start_date = datetime.fromtimestamp(float(start_timestamp)).strftime('%Y-%m-%d %H:%M:%S') end_date = datetime.fromtimestamp(float(end_timestamp)).strftime('%Y-%m-%d %H:%M:%S') logger.debug("Get %s from %r to %r on %s",node_name,start_date,end_date,node_name) request = "sites/"+site_name+"/metrics/"+metric+"/timeseries/"+node_name+"?resolution="+str(resolution)+"&from="+str(start_timestamp)+"&to="+str(end_timestamp) #curl -k "https://api.grid5000.fr/2.1/grid5000/sites/lyon/metrics/pdu/timeseries/taurus-3?resoltuion=15&from=1370205024&to=1370205400" logger.debug("API Request = %s",request) attr = APIConnection() response = attr.get(request) # Convert the result as a dict # print response tuple = ast.literal_eval(str(response)) # print type(tuple) # print len(tuple) res = tuple[1] #Parsing of the JSON data obj = json.loads(res) values = obj["values"] values = [x for x in values if type(x) == float] # print values # print len(values) # mean = reduce(lambda x, y: x + y, values) / len(values) return mean
def _munin_server(server, clients): """Install the monitoring service munin. Must be executed inside Grid'5000 to be able to resolve the server and clients IP. :param server: a execo.Host :param clients: a list of execo.Hosts :param plugins: a list of munin plugins """ logger.info( 'Munin monitoring service installation, server = %s, clients = \n %s', server.address, [host.address for host in clients]) logger.debug('Configuring munin server %s', style.host('server')) cmd = 'export DEBIAN_MASTER=noninteractive ; apt-get update && apt-get install -y munin' inst_munin_server = SshProcess(cmd, server).run() logger.debug('Creating configuration files for server') fd, server_conf = mkstemp(dir='/tmp/', prefix='munin-nodes_') f = fdopen(fd, 'w') for host in clients: get_ip = Process('host ' + host.address).run() ip = get_ip.stdout.strip().split(' ')[3] f.write('[' + host.address + ']\n address ' + ip + '\n use_node_name yes\n\n') f.close() Put([server], [server_conf], remote_location='/etc/').run() SshProcess('cd /etc && cp ' + server_conf.split('/')[-1] + ' munin.conf', server).run() Process('rm ' + server_conf).run()
def _start_disk_copy(self, disks=None, backing_file_dir='/tmp'): """ """ disks_copy = [] if not disks: disks = self.backing_files for bf in disks: logger.info('Treating ' + style.emph(bf)) logger.debug("Checking frontend disk vs host disk") raw_disk = '%s/orig_' % backing_file_dir + bf.split('/')[-1] f_disk = Process('md5sum -b ' + bf).run() disk_hash = f_disk.stdout.split(' ')[0] cmd = 'if [ -f ' + raw_disk + ' ]; ' + \ 'then md5sum -b ' + raw_disk + '; fi' h_disk = self.fact.get_remote(cmd, self.hosts).run() disk_ok = True for p in h_disk.processes: if p.stdout.split(' ')[0] != disk_hash: disk_ok = False break if disk_ok: logger.info("Disk " + style.emph(bf) + " is already present, skipping copy") else: disks_copy.append(self.fact.get_fileput(self.hosts, [bf], remote_location="%s" % backing_file_dir)) if len(disks_copy) > 0: self.copy_actions = ParallelActions(disks_copy).start() else: self.copy_actions = Remote('ls', self.hosts[0]).run()
def get_server_iface(server): """Get the default network interface of the serve """ logger.debug('Retrieving default interface from %s', style.host(server.address)) get_if = SshProcess('ip route |grep default |cut -d " " -f 5', server).run() return get_if.stdout.strip()
def _libvirt_bridged_network(self, bridge): logger.detail('Configuring libvirt network') # Creating an XML file describing the network root = Element('network') name = SubElement(root, 'name') name.text = 'default' SubElement(root, 'forward', attrib={'mode': 'bridge'}) SubElement(root, 'bridge', attrib={'name': bridge}) fd, network_xml = mkstemp(dir='/tmp/', prefix='create_br_') f = fdopen(fd, 'w') f.write(prettify(root)) f.close() logger.debug('Destroying existing network') destroy = self.fact.get_remote('virsh net-destroy default; ' + 'virsh net-undefine default', self.hosts) put = TaktukPut(self.hosts, [network_xml], remote_location='/root/') start = self.fact.get_remote( 'virsh net-define /root/' + \ network_xml.split('/')[-1] + ' ; ' + \ 'virsh net-start default; virsh net-autostart default;', self.hosts) netconf = SequentialActions([destroy, put, start]).run() self._actions_hosts(netconf)
def add_host(self, host, data=None): """Add a host in the graph :param host: a string corresponding to the node name :param data: a dict containing the Grid'5000 host attributes""" if isinstance(host, Host): _host = get_host_shortname(host.address) else: _host = host if data: power = data['performance']['core_flops'] cores = data['architecture']['nb_cores'] else: power = 0 cores = 0 if len(self.get_host_adapters(_host)) > 0: logger.debug('Adding %s', style.host(_host)) self.add_node(_host, {'kind': 'node', 'power': power, 'cores': cores}) for eq in self.get_host_adapters(_host): if eq['mounted']: self.add_equip(eq['switch'], get_host_site(_host)) else: logger.warning('Node %s has no valid network connection', _host)
def add_host(self, host, data=None): """Add a host in the graph :param host: a string corresponding to the node name :param data: a dict containing the Grid'5000 host attributes""" if isinstance(host, Host): _host = get_host_shortname(host.address) else: _host = host if data: power = data['performance']['core_flops'] cores = data['architecture']['nb_cores'] else: power = 0 cores = 0 if len(self.get_host_adapters(_host)) == 0: logger.warning('Node %s has no valid network connection', _host) logger.debug('Adding %s', style.host(_host)) self.add_node(_host, {'kind': 'node', 'power': power, 'cores': cores}) for eq in self.get_host_adapters(_host): if eq['mounted']: self.add_equip(eq['switch'], get_host_site(_host)) self._filter_equip_leaves()
def _filter_equip_leaves(self): finished = False while not finished: finished = True for n in self.nodes(): if self.node[n]['kind'] in ['switch', 'router', 'virtual']: if len(self.neighbors(n)) < 2: logger.debug('removing %s' % (n, )) self.remove_node(n) finished = False break to_remove = True for nb in self.neighbors(n): if self.node[nb][ 'kind'] != 'linecard' or not nb.startswith( n + '-lc'): to_remove = False else: if len(self.neighbors(nb)) >= 2: to_remove = False if to_remove: removed = [ x for x in self.neighbors(n) if self.node[x]['kind'] == 'linecard' and x.startswith(n + '-lc') ] removed.append(n) for r in removed: logger.debug('removing %s' % (r, )) self.remove_node(r) finished = False break
def _munin_server(server, clients): """Install the monitoring service munin. Must be executed inside Grid'5000 to be able to resolve the server and clients IP. :param server: a execo.Host :param clients: a list of execo.Hosts :param plugins: a list of munin plugins """ logger.info('Munin monitoring service installation, server = %s, clients = \n %s', server.address, [host.address for host in clients]) logger.debug('Configuring munin server %s', style.host('server')) cmd = 'export DEBIAN_MASTER=noninteractive ; apt-get update && apt-get install -y munin' inst_munin_server = SshProcess(cmd, server).run() logger.debug('Creating configuration files for server') fd, server_conf = mkstemp(dir='/tmp/', prefix='munin-nodes_') f = fdopen(fd, 'w') for host in clients: get_ip = Process('host '+host.address).run() ip = get_ip.stdout.strip().split(' ')[3] f.write('['+host.address+']\n address '+ip+'\n use_node_name yes\n\n') f.close() Put([server], [server_conf], remote_location='/etc/').run() SshProcess('cd /etc && cp '+server_conf.split('/')[-1]+' munin.conf', server).run() Process('rm '+server_conf).run()
def start_servers(self): servers = [host for host in self.servers] logger.info("Initialize the SeD") logger.debug("Compile the executables") cmd = "cd "+sched_dir+"; make clean && make" a = Remote(cmd, servers, connection_params = root_connection_params).run() for s in a.processes: pout = s.stdout logger.debug(pout) site = self.site cmd = "sed -i 's/LA_"+site+"/MA1/g' /root/dietg/cfgs/server.cfg;" a = Remote(cmd, servers, connection_params = root_connection_params).run() cmd = "cd /root/dietg/; ./set_sed.sh" a = Remote(cmd, servers, connection_params = root_connection_params).run() cmd = "if [ -e /root/dietg/log/total.jobs ]; then rm /root/dietg/log/total.jobs; fi" a = Remote(cmd, servers, connection_params = root_connection_params).run() cmd = "if [ -e /root/dietg/log/current.jobs ]; then rm /root/dietg/log/current.jobs; fi" a = Remote(cmd, servers, connection_params = root_connection_params).run() logger.info("Done!")
def __init__(self, elements=None): """Create the :func:`~nx.MultiGraph` representing Grid'5000 network topology :param sites: add the topology of the given site(s)""" logger.debug('Initializing g5k_graph') super(g5k_graph, self).__init__() self.data = get_api_data() self.graph['api_commit'] = self.data['network']['backbone'][0][ 'version'] self.graph['date'] = format_date(time()) if elements: if is_string(elements): elements = [elements] for e in elements: if e in get_g5k_sites(): self.add_site(e, self.data['sites'][e]) elif e in get_g5k_clusters(): self.add_cluster(e, self.data['clusters'][e]) else: e = get_host_shortname(e) if e in get_g5k_hosts(): self.add_host(e, self.data['hosts'][e]) if len(self.get_sites()) > 1: self.add_backbone()
def _libvirt_bridged_network(self, bridge): logger.detail('Configuring libvirt network') # Creating an XML file describing the network root = Element('network') name = SubElement(root, 'name') name.text = 'default' SubElement(root, 'forward', attrib={'mode': 'bridge'}) SubElement(root, 'bridge', attrib={'name': bridge}) fd, network_xml = mkstemp(dir='/tmp/', prefix='create_br_') f = fdopen(fd, 'w') f.write(prettify(root)) f.close() logger.debug('Destroying existing network') destroy = self.fact.get_remote( 'virsh net-destroy default; ' + 'virsh net-undefine default', self.hosts) put = TaktukPut(self.hosts, [network_xml], remote_location='/root/') start = self.fact.get_remote( 'virsh net-define /root/' + \ network_xml.split('/')[-1] + ' ; ' + \ 'virsh net-start default; virsh net-autostart default;', self.hosts) netconf = SequentialActions([destroy, put, start]).run() self._actions_hosts(netconf)
def __init__(self, elements=None): """Create the :func:`~nx.MultiGraph` representing Grid'5000 network topology :param sites: add the topology of the given site(s)""" logger.debug('Initializing g5k_graph') super(g5k_graph, self).__init__() self.data = get_api_data() self.graph['api_commit'] = self.data['network']['backbone'][0]['version'] self.graph['date'] = format_date(time()) if elements: if isinstance(elements, str): elements = [elements] for e in elements: if isinstance(e, Host): e = get_host_shortname(e.address) e = e.split('.')[0] if e in get_g5k_sites(): self.add_site(e, self.data['sites'][e]) if e in get_g5k_clusters(): self.add_cluster(e, self.data['clusters'][e]) if e in get_g5k_hosts(): self.add_host(e, self.data['hosts'][e]) if len(self.get_sites()) > 1: self.add_backbone()
def _start_disk_copy(self, disks=None): """ """ disks_copy = [] if not disks: disks = self.backing_files for bf in disks: logger.info('Treating ' + style.emph(bf)) logger.debug("Checking frontend disk vs host disk") raw_disk = '/tmp/orig_' + bf.split('/')[-1] f_disk = Process('md5sum -b ' + bf).run() disk_hash = f_disk.stdout.split(' ')[0] cmd = 'if [ -f ' + raw_disk + ' ]; ' + \ 'then md5sum -b ' + raw_disk + '; fi' h_disk = self.fact.get_remote(cmd, self.hosts).run() disk_ok = True for p in h_disk.processes: if p.stdout.split(' ')[0] != disk_hash: disk_ok = False break if disk_ok: logger.info("Disk " + style.emph(bf) + " is already present, skipping copy") else: disks_copy.append( self.fact.get_fileput(self.hosts, [bf], remote_location="/tmp")) if len(disks_copy) > 0: self.copy_actions = ParallelActions(disks_copy).start() else: self.copy_actions = Remote('ls', self.hosts[0]).run()
def _libvirt_uniquify(self): logger.detail('Making libvirt host unique') cmd = 'uuid=`uuidgen` ' + \ '&& sed -i "s/.*host_uuid.*/host_uuid=\\"${uuid}\\"/g" ' + \ '/etc/libvirt/libvirtd.conf ' + \ '&& service libvirtd restart' logger.debug(cmd) self.fact.get_remote(cmd, self.hosts).run()
def rm_equip(self, equip): """Remove an equipment from the node""" logger.debug('Removing equip %s', style.host(equip)) self.remove_node(equip) if get_network_equipment_attributes(equip)['kind'] == 'router': lc_nodes = [x for x in self.nodes() if equip in x] logger.debug('Removing router linecard %s', ' '.join(lc_nodes)) self.remove_nodes_from(lc_nodes)
def rm_host(self, host): """Remove the host from the graph""" logger.debug('Removing host %s', style.host(host)) self.remove_node(host) for eq in self.get_host_adapters(host): if not self._equip_has_nodes(eq['switch']): logger.debug('Removing equip %s', eq['switch']) self.rm_equip(eq['switch'])
def get_server_ip(host): """Get the server IP""" if isinstance(host, Host): host = host.address logger.debug('Retrieving IP from %s', style.host(host)) get_ip = Process('host ' + host + ' |cut -d \' \' -f 4') get_ip.shell = True get_ip.run() ip = get_ip.stdout.strip() return ip
def start_vms(vms): """ Return an action to start the VMs on the hosts """ hosts_cmds = {} for vm in vms: cmd = 'virsh --connect qemu:///system start ' + vm['id'] + ' ; ' hosts_cmds[vm['host']] = cmd if not vm['host'] in hosts_cmds \ else hosts_cmds[vm['host']] + cmd logger.debug(pformat(hosts_cmds)) return TaktukRemote('{{hosts_cmds.values()}}', list(hosts_cmds.keys()))
def find_coorm_slot(slots, resources_wanted): """ """ for start, stop, res in slots: logger.debug("%s %s %s" % (format_date(start), format_date(stop), res)) slot_ok = True for element, cpu in resources_wanted.items(): logger.debug("%s %s" % (element, cpu)) if res[element] < cpu * (stop - start) / 3600: slot_ok = False if slot_ok: return start, stop, res
def add_vms(vms, server): """Generate the list of virtual machines """ logger.debug('Adding the VM in /etc/hosts ...') fd, vms_list = mkstemp(dir='/tmp/', prefix='vms_') f = fdopen(fd, 'w') f.write('\n' + '\n'.join([vm['ip'] + ' \t ' + vm['id'] for vm in vms])) f.close() Put([server], [vms_list], remote_location='/etc/').run() SshProcess('[ -f /etc/hosts.bak ] && cp /etc/hosts.bak /etc/hosts || ' + ' cp /etc/hosts /etc/hosts.bak', server).run() Remote('cat /etc/' + vms_list.split('/')[-1] + ' >> /etc/hosts', [server]).run() Process('rm ' + vms_list).run()
def handle_starttag(self, tag, attrs): #logger.debug("Encountered a start tag: %s",tag) if "input" in tag and self.current_counter > 0 and self.nb_results <= self.max_results: logger.debug("Encountered a start tag: %s", tag) # ww8 / ww9 can be used to detect the downloading URL if ("ww8" in attrs[2][1] or "ww9" in attrs[2][1]) and "url" in attrs[1][1]: url = attrs[2][1] logger.debug("URL of the song") logger.debug(url) self.current_counter -= 1 logger.debug("Protecting the address with double quotes") url = '"' + url + '"' try: os.mkdir(self.destination_path) except OSError: logger.info("The folder %s already exists", self.destination_path) os.system("wget " + url + " -O " + self.destination_path + "/" + "XY") if 'keep_original' in self.naming: # We make a choice between the original name of the file or the search terms os.rename( self.destination_path + "/" + "XY", self.destination_path + "/" + self.artist.title() + " - " + self.title.title() + ".mp3") else: os.rename( self.destination_path + "/" + "XY", self.destination_path + "/" + self.data.title() + ".mp3") return
def handle_data(self, data): data = data.lower() song = self.artist + " - " + self.title if song in data: for word in self.exclude_list: if word in data and word not in song: logger.debug( "Encountered some exclude term in the results. Skip it. (-%s- was found)", word) return logger.debug("Encountered some data: %s", data) self.nb_results += 1 self.current_counter += 1 self.data = data
def get_host_adapters(self, host): """Return the mountable network interfaces from a host""" try: if host in self.data['hosts']: return [m for m in self.data['hosts'][host]['network_adapters'] if 'switch' in m and not m['management'] and m['mountable'] and m['switch'] and m['interface'] == 'Ethernet'] except: logger.warning("Wrong description for host %s" % style.host(host)) logger.debug("host's network_adapters = %s" % (self.data['hosts'][host]['network_adapters'],)) return []
def get_host_adapters(self, host): """Return the mountable network interfaces from a host""" try: if host in self.data['hosts']: return [ m for m in self.data['hosts'][host]['network_adapters'] if 'switch' in m and not m['management'] and m['mountable'] and m['switch'] and m['interface'] == 'Ethernet' ] except: logger.warning("Wrong description for host %s" % style.host(host)) logger.debug("host's network_adapters = %s" % (self.data['hosts'][host]['network_adapters'], )) return []
def _add_xml_elements(self): """Add sites, clusters, hosts to self.state """ _state = self.state logger.debug('Initial state \n %s', prettify(_state)) for site in self.sites: SubElement(_state, 'site', attrib={'id': site}) else: el_site = SubElement(_state, 'site', attrib={'id': 'unknown'}) logger.debug('Sites added \n %s', prettify(_state)) for cluster in self.clusters: el_site = _state.find("./site[@id='" + get_cluster_site(cluster) \ + "']") SubElement(el_site, 'cluster', attrib={'id': cluster}) else: el_cluster = SubElement(el_site, 'cluster', attrib={'id': 'unknown'}) logger.debug('Clusters added \n %s', prettify(_state)) print 'xxxxxxx', self.hosts hosts_attr = get_CPU_RAM_FLOPS(self.hosts) for host in self.hosts: if host in get_g5k_hosts(): el_cluster = _state.find(".//cluster/[@id='" + get_host_cluster(host) + "']") SubElement(el_cluster, 'host', attrib={'id': host, 'state': 'Undeployed', 'cpu': str(hosts_attr[host]['CPU']), 'mem': str(hosts_attr[host]['RAM'])}) logger.debug('Hosts added \n %s', prettify(_state))
def _add_xml_elements(self): """Add sites, clusters, hosts to self.state """ _state = self.state logger.debug('Initial state \n %s', prettify(_state)) for site in self.sites: SubElement(_state, 'site', attrib={'id': site}) else: el_site = SubElement(_state, 'site', attrib={'id': 'unknown'}) logger.debug('Sites added \n %s', prettify(_state)) for cluster in self.clusters: el_site = _state.find("./site[@id='" + get_cluster_site(cluster) \ + "']") SubElement(el_site, 'cluster', attrib={'id': cluster}) else: el_cluster = SubElement(el_site, 'cluster', attrib={'id': 'unknown'}) logger.debug('Clusters added \n %s', prettify(_state)) hosts_attr = get_CPU_RAM_FLOPS(self.hosts) for host in self.hosts: if host in get_g5k_hosts(): el_cluster = _state.find(".//cluster/[@id='" + get_host_cluster(host) + "']") SubElement(el_cluster, 'host', attrib={ 'id': host, 'state': 'Undeployed', 'cpu': str(hosts_attr[host]['CPU']), 'mem': str(hosts_attr[host]['RAM']) }) logger.debug('Hosts added \n %s', prettify(_state))
def add_vms(vms, server): """Generate the list of virtual machines """ logger.debug('Adding the VM in /etc/hosts ...') fd, vms_list = mkstemp(dir='/tmp/', prefix='vms_') f = fdopen(fd, 'w') f.write('\n' + '\n'.join([vm['ip'] + ' \t ' + vm['id'] for vm in vms])) f.close() Put([server], [vms_list], remote_location='/etc/').run() SshProcess( '[ -f /etc/hosts.bak ] && cp /etc/hosts.bak /etc/hosts || ' + ' cp /etc/hosts /etc/hosts.bak', server).run() Remote('cat /etc/' + vms_list.split('/')[-1] + ' >> /etc/hosts', [server]).run() Process('rm ' + vms_list).run()
def get_kavlan_network(kavlan, site): """Retrieve the network parameters for a given kavlan from the API""" logger.debug(str(kavlan) + ' on site ' + site) network, mask_size = None, None equips = get_resource_attributes('/sites/' + site + '/network_equipments/') for equip in equips['items']: if 'vlans' in equip and len(equip['vlans']) > 2: all_vlans = equip['vlans'] break for info in all_vlans.itervalues(): if isinstance(info, dict) and 'name' in info \ and info['name'] == 'kavlan-' + str(kavlan): network, _, mask_size = info['addresses'][0].partition('/',) logger.debug('network=%s, mask_size=%s', network, mask_size) return network, mask_size
def get_kavlan_network(kavlan, site): """Retrieve the network parameters for a given kavlan from the API""" logger.debug(str(kavlan) + ' on site ' + site) network, mask_size = None, None equips = get_resource_attributes('/sites/' + site + '/network_equipments/') for equip in equips['items']: if 'vlans' in equip and len(equip['vlans']) > 2: all_vlans = equip['vlans'] break for info in all_vlans.itervalues(): if isinstance(info, dict) and 'name' in info \ and info['name'] == 'kavlan-' + str(kavlan): network, _, mask_size = info['addresses'][0].partition('/', ) logger.debug('network=%s, mask_size=%s', network, mask_size) return network, mask_size
def get_hosts_list(self, hosts_str): """Generate a list of hosts from the given string. Args: hosts_str (str): The following options are supported - The path of the file containing the hosts to be used. Each host should be in a different line. Repeated hosts are pruned. Hint: in a running Grid5000 job, $OAR_NODEFILE should be used. - A comma-separated list of site:job_id - A comma-separated list of hosts. - An oargrid_job_id Return: list of Host: The list of hosts. """ hosts = [] if os.path.isfile(hosts_str): for line in open(hosts_str): h = Host(line.rstrip()) if h not in hosts: hosts.append(h) elif ':' in hosts_str: # We assume the string is a comma separated list of site:job_id for job in hosts_str.split(','): site, job_id = job.split(':') hosts += get_oar_job_nodes(int(job_id), site) elif "," in hosts_str: # We assume the string is a comma separated list of hosts for hstr in hosts_str.split(','): h = Host(hstr.rstrip()) if h not in hosts: hosts.append(h) elif hosts_str.isdigit(): # If the file_name is a number, we assume this is a oargrid_job_id hosts = get_oargrid_job_nodes(int(hosts_str)) else: # If not any of the previous, we assume is a single-host cluster # where the given input is the only host hosts = [Host(hosts_str.rstrip())] logger.debug('Hosts list: \n%s', ' '.join(style.host(host.address.split('.')[0]) for host in hosts)) return hosts
def activate_vms(vms, dest='lyon.grid5000.fr'): """Connect locally on every host and on all VMS to ping a host and update ARP tables""" logger.info('Executing ping from virtual machines on hosts') cmd = "VMS=`virsh list | grep -v State | grep -v -e '----' | awk '{print $2}'`; " + \ "for VM in $VMS; do " + \ " ssh $VM \"ping -c 3 " + dest + " \"; " + \ "done" logger.debug('Launching ping probes to update ARP tables with %s', cmd) activate = TaktukRemote(cmd, list(set([vm['host'] for vm in vms]))) for p in activate.processes: p.ignore_exit_code = p.nolog_exit_code = True if logger.getEffectiveLevel() <= 10: p.stdout_handlers.append(sys.stdout) activate.run() return activate.ok
def _get_bridge(self, hosts): """ """ logger.debug('Retrieving bridge on hosts %s', ", ".join([host for host in hosts])) cmd = "brctl show |grep -v 'bridge name' | awk '{ print $1 }' |head -1" bridge_exists = self.fact.get_remote(cmd, hosts) bridge_exists.nolog_exit_code = True bridge_exists.run() hosts_br = {} for p in bridge_exists.processes: stdout = p.stdout.strip() if len(stdout) == 0: hosts_br[p.host] = None else: hosts_br[p.host] = stdout return hosts_br
def list_vm(hosts, not_running=False): """ Return the list of VMs on hosts using a disk which keys are the hosts and value are list of VM id""" cmd = 'virsh --connect qemu:///system list' if not_running: cmd += ' --all' logger.debug('Listing Virtual machines on ' + pformat(hosts)) list_vm = TaktukRemote(cmd, hosts).run() hosts_vms = {host: [] for host in hosts} for p in list_vm.processes: lines = p.stdout.split('\n') for line in lines: if 'running' in line or 'shut off' in line: std = line.split() hosts_vms[p.host.address].append({'id': std[1]}) logger.debug(pformat(hosts_vms)) return hosts_vms
def create_disks(vms): """ Return an action to create the disks for the VMs on the hosts""" logger.detail(', '.join([vm['id'] for vm in sorted(vms)])) hosts_cmds = {} for vm in vms: if vm['real_file']: cmd = cmd_disk_real(vm) else: cmd = cmd_disk_qcow2(vm) logger.detail(vm['id'] + ': ' + cmd) hosts_cmds[vm['host']] = cmd if not vm['host'] in hosts_cmds \ else hosts_cmds[vm['host']] + cmd logger.debug(pformat(hosts_cmds.values())) return TaktukRemote('{{hosts_cmds.values()}}', list(hosts_cmds.keys()))
def dnsmasq_server(server, clients=None, vms=None, dhcp=True): """Configure a DHCP server with dnsmasq :param server: host where the server will be installed :param clients: list of hosts that will be declared in dnsmasq :param vms: list of virtual machines """ logger.debug('Installing and configuring a DNS/DHCP server on %s', server) test_running = Process('nmap ' + server + ' -p 53 | grep domain') test_running.shell = True test_running.run() if 'open' in test_running.stdout: logger.info('DNS server already running, updating configuration') else: cmd = 'killall dnsmasq; export DEBIAN_MASTER=noninteractive ; ' + \ 'apt-get update ; apt-get -y purge dnsmasq-base ; ' + \ 'apt-get install -t wheezy -o Dpkg::Options::="--force-confdef" ' + \ '-o Dpkg::Options::="--force-confnew" ' + \ '-y dnsmasq; echo 1 > /proc/sys/net/ipv4/ip_forward ' SshProcess(cmd, server).run() sites = list( set([ get_host_site(client) for client in clients if get_host_site(client) ] + [get_host_site(server)])) add_vms(vms, server) if clients: kill_dnsmasq = TaktukRemote('killall dnsmasq', clients) for p in kill_dnsmasq.processes: p.ignore_exit_code = p.nolog_exit_code = True kill_dnsmasq.run() resolv_conf(server, clients, sites) if dhcp: sysctl_conf(server, vms) dhcp_conf(server, vms, sites) logger.debug('Restarting service ...') cmd = 'service dnsmasq stop ; rm /var/lib/misc/dnsmasq.leases ; ' + \ 'service dnsmasq start', SshProcess(cmd, server).run()
def start_MA(self): hostname = self.MA logger.info("Initialize Master Agent on node %s",hostname) logger.debug("Compile the executables") cmd = "cd "+sched_dir+"; make clean && make" a = Remote(cmd, hostname, connection_params = root_connection_params).run() for s in a.processes: pout = s.stderr logger.info("Chosen scheduler is : %s",self.scheduler) cmd = "cd /root/dietg/; ./set_masternode.sh" a = Remote(cmd, hostname, connection_params = root_connection_params).start() for s in a.processes: pout = s.stdout logger.info(pout) logger.info("Done!")
def dnsmasq_server(server, clients=None, vms=None, dhcp=True): """Configure a DHCP server with dnsmasq :param server: host where the server will be installed :param clients: list of hosts that will be declared in dnsmasq :param vms: list of virtual machines """ logger.debug('Installing and configuring a DNS/DHCP server on %s', server) test_running = Process('nmap ' + server + ' -p 53 | grep domain') test_running.shell = True test_running.run() if 'open' in test_running.stdout: logger.info('DNS server already running, updating configuration') else: cmd = 'killall dnsmasq; export DEBIAN_MASTER=noninteractive ; ' + \ 'apt-get update ; apt-get -y purge dnsmasq-base ; ' + \ 'apt-get install -t wheezy -o Dpkg::Options::="--force-confdef" ' + \ '-o Dpkg::Options::="--force-confnew" ' + \ '-y dnsmasq; echo 1 > /proc/sys/net/ipv4/ip_forward ' SshProcess(cmd, server).run() sites = list(set([get_host_site(client) for client in clients if get_host_site(client)] + [get_host_site(server)])) add_vms(vms, server) if clients: kill_dnsmasq = TaktukRemote('killall dnsmasq', clients) for p in kill_dnsmasq.processes: p.ignore_exit_code = p.nolog_exit_code = True kill_dnsmasq.run() resolv_conf(server, clients, sites) if dhcp: sysctl_conf(server, vms) dhcp_conf(server, vms, sites) logger.debug('Restarting service ...') cmd = 'service dnsmasq stop ; rm /var/lib/misc/dnsmasq.leases ; ' + \ 'service dnsmasq start', SshProcess(cmd, server).run()
def start_clients(self): clients = [self.clients] servers = [host for host in self.servers] logger.info("Initialize client on node %s",clients) cmd = "cd "+sched_dir+"; make clean && make" a = Remote(cmd, clients, connection_params = root_connection_params).run() for s in a.processes: pout = s.stdout logger.debug(pout) cmd = "cd /root/dietg/; ./set_client.sh" a = Remote(cmd, clients, connection_params = root_connection_params).run() for s in a.processes: pout = s.stdout logger.debug(pout) cmd = "cd "+sched_dir+"; ./client_"+self.exp_size start = time.time() pause = 10 if self.exp_size == "small": pause = 8 elif self.exp_size == "regular": pause = 90 #90 elif self.exp_size == "big": pause = 910 self.task_distribution(len(self.servers),pause,cmd,work_rate = 2) #a = Remote(cmd, clients, connection_params = root_connection_params).run() end = time.time() # for s in a.processes: # pout = s.stdout # logger.info(pout) self.makespan = (end - start) logger.info("Done, check the logs!") return start,end
def add_backbone(self): """Add the nodes corresponding to Renater equipments""" logger.debug('Add %s network', style.emph('Renater')) backbone = self.data['network']['backbone'] for equip in backbone: src = equip['uid'] self.add_node(src, kind='renater') for lc in equip['linecards']: for port in lc['ports']: if 'uid' in port and 'renater-' in _parse_port_uid( port['uid']): port_bw = lc['rate'] if 'rate' not in port else port[ 'rate'] latency = port['latency'] if 'latency' in port \ else arbitrary_latency kind = 'renater' if 'kind' not in port else port['kind'] dst = _parse_port_uid(port['uid']) logger.debug('* %s (%s, bw=%s, lat=%s)', dst, kind, port_bw, latency) self.add_node(dst, kind=kind) if not self.has_edge(src, dst): self.add_edge(src, dst, _unique_link_key(src, dst), bandwidth=port_bw, latency=latency, active=True) # Removing unused one logger.debug('Removing unused Renater equipments') used_elements = [] for site in self.get_sites(): dests = [s for s in self.get_sites() if s != site] for dest in dests: gw_src = self.get_site_router(site)[0] gw_dst = self.get_site_router(dest)[0] if not gw_src is None and not gw_dst is None: for element in [ el for el in nx.shortest_path(self, gw_src, gw_dst) if 'renater' in el ]: if element not in used_elements: used_elements.append(element) for element, _ in [ n for n in self.nodes_iter(data=True) if n[1]['kind'] == 'renater' ]: if element not in used_elements: logger.debug('removing %s' % (element, )) self.remove_node(element)
def dhcp_conf(server, vms, sites): """Generate the dnsmasq.conf with dhcp parameters and put it on the server""" logger.debug('Creating dnsmasq.conf') ip_mac = [(vm['ip'], vm['mac']) for vm in vms] dhcp_lease = 'dhcp-lease-max=10000\n' dhcp_range = 'dhcp-range=' + ip_mac[0][0] + ',' + ip_mac[len(vms) - 1][0] + ',12h\n' dhcp_router = 'dhcp-option=option:router,' + get_server_ip(server) + '\n' dhcp_hosts = '' + '\n'.join(['dhcp-host=' + ':' + ip_mac[i][1] + ',' + vms[i]['id'] + ',' + ip_mac[i][0] for i in range(len(vms))]) dhcp_option = 'dhcp-option=option:domain-search,grid5000.fr,' + \ ','.join([site + '.grid5000.fr' for site in sites]) + '\n' fd, dnsmasq = mkstemp(dir='/tmp/', prefix='dnsmasq_') f = fdopen(fd, 'w') f.write(dhcp_lease + dhcp_range + dhcp_router + dhcp_hosts + '\n' + dhcp_option) f.close() Put([server], [dnsmasq], remote_location='/etc/').run() SshProcess('cd /etc && cp ' + dnsmasq.split('/')[-1]+' dnsmasq.conf', server).run() Process('rm ' + dnsmasq).run()
def get_CPU_RAM_FLOPS(hosts): """Return the number of CPU and amount RAM for a host list """ hosts_attr = {'TOTAL': {'CPU': 0, 'RAM': 0}} cluster_attr = {} for host in hosts: if isinstance(host, Host): host = host.address cluster = get_host_cluster(host) if cluster not in cluster_attr: attr = get_host_attributes(host) cluster_attr[cluster] = { 'CPU': attr['architecture']['smt_size'], 'RAM': int(attr['main_memory']['ram_size'] / 10 ** 6), 'flops': attr['performance']['node_flops']} hosts_attr[host] = cluster_attr[cluster] hosts_attr['TOTAL']['CPU'] += attr['architecture']['smt_size'] hosts_attr['TOTAL']['RAM'] += int(attr['main_memory']['ram_size'] \ / 10 ** 6) logger.debug(hosts_list(hosts_attr)) return hosts_attr
def get_CPU_RAM_FLOPS(hosts): """Return the number of CPU and amount RAM for a host list """ hosts_attr = {'TOTAL': {'CPU': 0, 'RAM': 0}} cluster_attr = {} for host in hosts: if isinstance(host, Host): host = host.address cluster = get_host_cluster(host) if cluster not in cluster_attr: attr = get_host_attributes(host) cluster_attr[cluster] = { 'CPU': attr['architecture']['nb_cores'], 'RAM': int(attr['main_memory']['ram_size'] / 10 ** 6), 'flops': attr['performance']['node_flops']} hosts_attr[host] = cluster_attr[cluster] hosts_attr['TOTAL']['CPU'] += attr['architecture']['nb_cores'] hosts_attr['TOTAL']['RAM'] += int(attr['main_memory']['ram_size'] \ / 10 ** 6) logger.debug(hosts_list(hosts_attr)) return hosts_attr
def get_oar_job_vm5k_resources(jobs): """Retrieve the hosts list and (ip, mac) list from a list of oar_job and return the resources dict needed by vm5k_deployment """ resources = {} for oar_job_id, site in jobs: logger.detail('Retrieving resources from %s:%s', style.emph(site), oar_job_id) oar_job_id = int(oar_job_id) wait_oar_job_start(oar_job_id, site) logger.debug('Retrieving hosts') hosts = [host.address for host in get_oar_job_nodes(oar_job_id, site)] logger.debug('Retrieving subnet') ip_mac, _ = get_oar_job_subnets(oar_job_id, site) kavlan = None if len(ip_mac) == 0: logger.debug('Retrieving kavlan') kavlan = get_oar_job_kavlan(oar_job_id, site) if kavlan: assert(len(kavlan) == 1) kavlan = kavlan[0] ip_mac = get_kavlan_ip_mac(kavlan, site) resources[site] = {'hosts': hosts, 'ip_mac': ip_mac[300:], 'kavlan': kavlan} return resources
def get_oar_job_vm5k_resources(jobs): """Retrieve the hosts list and (ip, mac) list from a list of oar_job and return the resources dict needed by vm5k_deployment """ resources = {} for oar_job_id, site in jobs: logger.detail('Retrieving resources from %s:%s', style.emph(site), oar_job_id) oar_job_id = int(oar_job_id) wait_oar_job_start(oar_job_id, site) logger.debug('Retrieving hosts') hosts = [host.address for host in get_oar_job_nodes(oar_job_id, site)] logger.debug('Retrieving subnet') ip_mac, _ = get_oar_job_subnets(oar_job_id, site) kavlan = None if len(ip_mac) == 0: logger.debug('Retrieving kavlan') kavlan = get_oar_job_kavlan(oar_job_id, site) if kavlan: assert (len(kavlan) == 1) kavlan = kavlan[0] ip_mac = get_kavlan_ip_mac(kavlan, site) resources[site] = { 'hosts': hosts, 'ip_mac': ip_mac[300:], 'kavlan': kavlan } return resources
def wait_hosts_up(hosts, timeout=300): """ """ down_hosts = map(lambda x: x.address if isinstance(x, Host) else x, hosts) fd, hosts_file = mkstemp(dir='/tmp/', prefix='hosts_') f = fdopen(fd, 'w') f.write('\n' + '\n'.join(down_hosts)) f.close() timer = Timer() while len(down_hosts) > 0 and timer.elapsed() < timeout: nmap = Process("nmap -v -oG - -i %s -p 22 |grep Host|grep Status" % (hosts_file, ), shell=True).run() logger.debug('timer: %s \nnmap output: \n%s', timer.elapsed(), nmap.stdout.strip()) for line in nmap.stdout.strip().split('\n'): s = line.split()[2] host = s[s.find("(") + 1:s.find(")")] if host in down_hosts: logger.detail('%s is up', host) down_hosts.remove(host) Process('rm ' + hosts_file).run() sleep(3) return len(down_hosts) == 0
def get_hosts_list(self, hosts_str): """Generate a list of hosts from the given file. Args: hosts_str (str): The following options are supported - The path of the file containing the hosts to be used. Each host should be in a different line. Repeated hosts are pruned. Hint: in a running Grid5000 job, $OAR_NODEFILE should be used. - A comma-separated list of hosts. Return: list of Host: The list of hosts. """ hosts = [] if os.path.isfile(hosts_str): for line in open(hosts_str): h = Host(line.rstrip()) if h not in hosts: hosts.append(h) elif "," in hosts_str: # We assume the string is a comma separated list of hosts for hstr in hosts_str.split(','): h = Host(hstr.rstrip()) if h not in hosts: hosts.append(h) else: # If not any of the previous, we assume is a single-host cluster # where the given input is the only host hosts = [Host(hosts_str.rstrip())] logger.debug('Hosts list: \n%s', ' '.join(style.host(host.address.split('.')[0]) for host in hosts)) return hosts