예제 #1
0
파일: utils.py 프로젝트: lpouillo/vm5k
def get_oar_job_vm5k_resources(jobs):
    """Retrieve the hosts list and (ip, mac) list from a list of oar_job and
    return the resources dict needed by vm5k_deployment """
    resources = {}
    for oar_job_id, site in jobs:
        logger.detail('Retrieving resources from %s:%s',
                      style.emph(site), oar_job_id)
        oar_job_id = int(oar_job_id)
        wait_oar_job_start(oar_job_id, site)
        logger.debug('Retrieving hosts')
        hosts = [host.address for host in get_oar_job_nodes(oar_job_id, site)]
        logger.debug('Retrieving subnet')
        ip_mac, _ = get_oar_job_subnets(oar_job_id, site)
        kavlan = None
        if len(ip_mac) == 0:
            logger.debug('Retrieving kavlan')
            kavlan = get_oar_job_kavlan(oar_job_id, site)
            if kavlan:
                assert(len(kavlan) == 1)
                kavlan = kavlan[0]
                ip_mac = get_kavlan_ip_mac(kavlan, site)
        resources[site] = {'hosts': hosts,
                           'ip_mac': ip_mac[300:],
                           'kavlan': kavlan}
    return resources
예제 #2
0
파일: utils.py 프로젝트: badock/vm5k
def wait_hosts_down(hosts, timeout=300):
    """ """
    timer = Timer()
    up_hosts = map(lambda x: x.address if isinstance(x, Host) else x,
                   hosts)
    fd, hosts_file = mkstemp(dir='/tmp/', prefix='hosts_')
    with fdopen(fd, 'w') as f:
        f.write('\n' + '\n'.join(up_hosts))

    while len(up_hosts) > 0 and timer.elapsed() < timeout:
        nmap = Process("nmap -v -oG - -i %s -p 22 |grep Host|grep Status" %
                       (hosts_file, ), shell=True).run()
        logger.debug('timer: %s \nnmap output: \n%s', timer.elapsed(),
                     nmap.stdout.strip())
        for line in nmap.stdout.strip().split('\n'):
            if 'Down' in line:
                ip = line.split()[1]
                get_host = Process('host ' + ip + '| cut -f 5 -d " "',
                                   shell=True).run()
                host = get_host.stdout.strip()[0:-1]
                if host in up_hosts:
                    logger.detail(host + ' is down')
                    up_hosts.remove(host)
    Process('rm ' + hosts_file).run()

    return len(up_hosts) == 0
예제 #3
0
파일: deployment.py 프로젝트: lpouillo/vm5k
    def _enable_bridge(self, name='br0'):
        """We need a bridge to have automatic DHCP configuration for the VM."""
        logger.detail('Configuring the bridge')
        hosts_br = self._get_bridge(self.hosts)
        nobr_hosts = []
        for host, br in hosts_br.iteritems():
            if br is None:
                logger.debug('No bridge on host %s', style.host(host))
                nobr_hosts.append(host)
            elif br != name:
                logger.debug('Wrong bridge on host %s, destroying it',
                             style.host(host))
                SshProcess('ip link set ' + br + ' down ; brctl delbr ' + br,
                            host).run()
                nobr_hosts.append(host)
            else:
                logger.debug('Bridge %s is present on host %s',
                             style.emph('name'), style.host(host))

        nobr_hosts = map(lambda x: x.address if isinstance(x, Host) else x, 
                         nobr_hosts)

        if len(nobr_hosts) > 0:
            logger.debug('Creating bridge on %s', hosts_list(nobr_hosts))
            script = 'export br_if=`ip route |grep default |cut -f 5 -d " "`; \n' + \
    'ifdown $br_if ; \n' + \
    'sed -i "s/$br_if inet dhcp/$br_if inet manual/g" /etc/network/interfaces ; \n' + \
    'sed -i "s/auto $br_if//g" /etc/network/interfaces ; \n' + \
    'echo " " >> /etc/network/interfaces ; \n' + \
    'echo "auto ' + name + '" >> /etc/network/interfaces ; \n' + \
    'echo "iface ' + name + ' inet dhcp" >> /etc/network/interfaces ; \n' + \
    'echo "  bridge_ports $br_if" >> /etc/network/interfaces ; \n' + \
    'echo "  bridge_stp off" >> /etc/network/interfaces ; \n' + \
    'echo "  bridge_maxwait 0" >> /etc/network/interfaces ; \n' + \
    'echo "  bridge_fd 0" >> /etc/network/interfaces ; \n' + \
    'ifup ' + name
            fd, br_script = mkstemp(dir='/tmp/', prefix='create_br_')
            f = fdopen(fd, 'w')
            f.write(script)
            f.close()

            self.fact.get_fileput(nobr_hosts, [br_script]).run()
            self.fact.get_remote('nohup sh ' + br_script.split('/')[-1],
                                 nobr_hosts).run()

            logger.debug('Waiting for network restart')
            if_up = False
            nmap_tries = 0
            while (not if_up) and nmap_tries < 20:
                sleep(20)
                nmap_tries += 1
                nmap = Process('nmap ' +
                               ' '.join([host for host in nobr_hosts]) +
                               ' -p 22').run()
                for line in nmap.stdout.split('\n'):
                    if 'Nmap done' in line:
                        if_up = line.split()[2] == line.split()[5].replace('(',
                                                                           '')
            logger.debug('Network has been restarted')
        logger.detail('All hosts have the bridge %s', style.emph(name))
예제 #4
0
def wait_hosts_down(hosts, timeout=300):
    """ """
    timer = Timer()
    up_hosts = map(lambda x: x.address if isinstance(x, Host) else x, hosts)
    fd, hosts_file = mkstemp(dir='/tmp/', prefix='hosts_')
    with fdopen(fd, 'w') as f:
        f.write('\n' + '\n'.join(up_hosts))

    while len(up_hosts) > 0 and timer.elapsed() < timeout:
        nmap = Process("nmap -v -oG - -i %s -p 22 |grep Host|grep Status" %
                       (hosts_file, ),
                       shell=True).run()
        logger.debug('timer: %s \nnmap output: \n%s', timer.elapsed(),
                     nmap.stdout.strip())
        for line in nmap.stdout.strip().split('\n'):
            if 'Down' in line:
                ip = line.split()[1]
                get_host = Process('host ' + ip + '| cut -f 5 -d " "',
                                   shell=True).run()
                host = get_host.stdout.strip()[0:-1]
                if host in up_hosts:
                    logger.detail(host + ' is down')
                    up_hosts.remove(host)
    Process('rm ' + hosts_file).run()

    return len(up_hosts) == 0
예제 #5
0
def get_oar_job_vm5k_resources(jobs):
    """Retrieve the hosts list and (ip, mac) list from a list of oar_job and
    return the resources dict needed by vm5k_deployment """
    resources = {}
    for oar_job_id, site in jobs:
        logger.detail('Retrieving resources from %s:%s', style.emph(site),
                      oar_job_id)
        oar_job_id = int(oar_job_id)
        wait_oar_job_start(oar_job_id, site)
        logger.debug('Retrieving hosts')
        hosts = [host.address for host in get_oar_job_nodes(oar_job_id, site)]
        logger.debug('Retrieving subnet')
        ip_mac, _ = get_oar_job_subnets(oar_job_id, site)
        kavlan = None
        if len(ip_mac) == 0:
            logger.debug('Retrieving kavlan')
            kavlan = get_oar_job_kavlan(oar_job_id, site)
            if kavlan:
                assert (len(kavlan) == 1)
                kavlan = kavlan[0]
                ip_mac = get_kavlan_ip_mac(kavlan, site)
        resources[site] = {
            'hosts': hosts,
            'ip_mac': ip_mac[300:],
            'kavlan': kavlan
        }
    return resources
예제 #6
0
    def _configure_apt(self):
        """Create the sources.list file """
        logger.detail('Configuring APT')
        # Create sources.list file
        fd, tmpsource = mkstemp(dir='/tmp/', prefix='sources.list_')
        f = fdopen(fd, 'w')
        f.write('deb http://ftp.debian.org/debian wheezy main contrib non-free\n' + \
                'deb http://ftp.debian.org/debian wheezy-backports main contrib non-free\n' + \
                'deb http://security.debian.org/ wheezy/updates main contrib non-free\n')
        f.close()
        # Create preferences file
        fd, tmppref = mkstemp(dir='/tmp/', prefix='preferences_')
        f = fdopen(fd, 'w')
        f.write('Package: * \nPin: release a=wheezy \nPin-Priority: 900\n\n' + \
                'Package: * \nPin: release a=wheezy-backports \nPin-Priority: 875\n\n')
        f.close()
        # Create apt.conf file
        fd, tmpaptconf = mkstemp(dir='/tmp/', prefix='apt.conf_')
        f = fdopen(fd, 'w')
        f.write('APT::Acquire::Retries=20;\n')
        f.close()

        TaktukPut(self.hosts, [tmpsource, tmppref, tmpaptconf],
                  remote_location='/etc/apt/').run()
        cmd = 'cd /etc/apt && ' + \
            'mv ' + tmpsource.split('/')[-1] + ' sources.list &&' + \
            'mv ' + tmppref.split('/')[-1] + ' preferences &&' + \
            'mv ' + tmpaptconf.split('/')[-1] + ' apt.conf'
        apt_conf = self.fact.get_remote(cmd, self.hosts).run()
        self._actions_hosts(apt_conf)
        Local('rm ' + tmpsource + ' ' + tmppref + ' ' + tmpaptconf).run()
예제 #7
0
파일: deployment.py 프로젝트: badock/vm5k
    def _create_backing_file(self, disks=None, backing_file_dir='/tmp'):
        """ """
        if not self.copy_actions:
            self._start_disk_copy(disks)
        if not self.copy_actions.ended:
            logger.info("Waiting for the end of the disks copy")
            self.copy_actions.wait()
        if isinstance(self.copy_actions, ParallelActions):
            mv_actions = []
            for act in self.copy_actions.actions:
                fname = act.local_files[0].split('/')[-1]
                mv_actions.append(self.fact.get_remote("mv %s/" % backing_file_dir + fname +
                                                       " %s/orig_" % backing_file_dir + fname,
                                                       self.hosts))

            mv = ParallelActions(mv_actions).run()

        if not disks:
            disks = self.backing_files
        for bf in disks:
            raw_disk = '%s/orig_' % backing_file_dir + bf.split('/')[-1]
            to_disk = '%s/' % backing_file_dir + bf.split('/')[-1]
            self.fact.get_remote('cp ' + raw_disk + ' ' + to_disk, self.hosts).run()
            logger.info('Copying ssh key on ' + to_disk + ' ...')
            cmd = 'modprobe nbd max_part=16; ' + \
                'qemu-nbd --connect=/dev/nbd0 ' + to_disk + \
                ' ; sleep 3 ; partprobe /dev/nbd0 ; ' + \
                'part=`fdisk -l /dev/nbd0 |grep dev|grep Linux| grep -v swap|cut -f 1 -d " "` ; ' + \
                'mount $part /mnt ; mkdir -p /mnt/root/.ssh ; ' + \
                'cat /root/.ssh/authorized_keys >> /mnt/root/.ssh/authorized_keys ; ' + \
                'cp -r /root/.ssh/id_rsa* /mnt/root/.ssh/ ;' + \
                'umount /mnt; qemu-nbd -d /dev/nbd0'
            logger.detail(cmd)
            copy_on_vm_base = self.fact.get_remote(cmd, self.hosts).run()
            self._actions_hosts(copy_on_vm_base)
예제 #8
0
파일: deployment.py 프로젝트: badock/vm5k
    def _libvirt_bridged_network(self, bridge):
        logger.detail('Configuring libvirt network')
        # Creating an XML file describing the network
        root = Element('network')
        name = SubElement(root, 'name')
        name.text = 'default'
        SubElement(root, 'forward', attrib={'mode': 'bridge'})
        SubElement(root, 'bridge', attrib={'name': bridge})
        fd, network_xml = mkstemp(dir='/tmp/', prefix='create_br_')
        f = fdopen(fd, 'w')
        f.write(prettify(root))
        f.close()
        logger.debug('Destroying existing network')
        destroy = self.fact.get_remote('virsh net-destroy default; ' +
                                       'virsh net-undefine default',
                                       self.hosts)
        put = TaktukPut(self.hosts, [network_xml],
                        remote_location='/root/')
        start = self.fact.get_remote(
            'virsh net-define /root/' + \
            network_xml.split('/')[-1] + ' ; ' + \
            'virsh net-start default; virsh net-autostart default;',
            self.hosts)
        netconf = SequentialActions([destroy, put, start]).run()

        self._actions_hosts(netconf)
예제 #9
0
    def _libvirt_bridged_network(self, bridge):
        logger.detail('Configuring libvirt network')
        # Creating an XML file describing the network
        root = Element('network')
        name = SubElement(root, 'name')
        name.text = 'default'
        SubElement(root, 'forward', attrib={'mode': 'bridge'})
        SubElement(root, 'bridge', attrib={'name': bridge})
        fd, network_xml = mkstemp(dir='/tmp/', prefix='create_br_')
        f = fdopen(fd, 'w')
        f.write(prettify(root))
        f.close()
        logger.debug('Destroying existing network')
        destroy = self.fact.get_remote(
            'virsh net-destroy default; ' + 'virsh net-undefine default',
            self.hosts)
        put = TaktukPut(self.hosts, [network_xml], remote_location='/root/')
        start = self.fact.get_remote(
            'virsh net-define /root/' + \
            network_xml.split('/')[-1] + ' ; ' + \
            'virsh net-start default; virsh net-autostart default;',
            self.hosts)
        netconf = SequentialActions([destroy, put, start]).run()

        self._actions_hosts(netconf)
예제 #10
0
    def _enable_bridge(self, name='br0'):
        """We need a bridge to have automatic DHCP configuration for the VM."""
        logger.detail('Configuring the bridge')
        hosts_br = self._get_bridge(self.hosts)
        nobr_hosts = []
        for host, br in hosts_br.iteritems():
            if br is None:
                logger.debug('No bridge on host %s', style.host(host))
                nobr_hosts.append(host)
            elif br != name:
                logger.debug('Wrong bridge on host %s, destroying it',
                             style.host(host))
                SshProcess('ip link set ' + br + ' down ; brctl delbr ' + br,
                           host).run()
                nobr_hosts.append(host)
            else:
                logger.debug('Bridge %s is present on host %s',
                             style.emph('name'), style.host(host))

        nobr_hosts = map(lambda x: x.address
                         if isinstance(x, Host) else x, nobr_hosts)

        if len(nobr_hosts) > 0:
            logger.debug('Creating bridge on %s', hosts_list(nobr_hosts))
            script = 'export br_if=`ip route |grep default |cut -f 5 -d " "`; \n' + \
    'ifdown $br_if ; \n' + \
    'sed -i "s/$br_if inet dhcp/$br_if inet manual/g" /etc/network/interfaces ; \n' + \
    'sed -i "s/auto $br_if//g" /etc/network/interfaces ; \n' + \
    'echo " " >> /etc/network/interfaces ; \n' + \
    'echo "auto ' + name + '" >> /etc/network/interfaces ; \n' + \
    'echo "iface ' + name + ' inet dhcp" >> /etc/network/interfaces ; \n' + \
    'echo "  bridge_ports $br_if" >> /etc/network/interfaces ; \n' + \
    'echo "  bridge_stp off" >> /etc/network/interfaces ; \n' + \
    'echo "  bridge_maxwait 0" >> /etc/network/interfaces ; \n' + \
    'echo "  bridge_fd 0" >> /etc/network/interfaces ; \n' + \
    'ifup ' + name
            fd, br_script = mkstemp(dir='/tmp/', prefix='create_br_')
            f = fdopen(fd, 'w')
            f.write(script)
            f.close()

            self.fact.get_fileput(nobr_hosts, [br_script]).run()
            self.fact.get_remote('nohup sh ' + br_script.split('/')[-1],
                                 nobr_hosts).run()

            logger.debug('Waiting for network restart')
            if_up = False
            nmap_tries = 0
            while (not if_up) and nmap_tries < 20:
                sleep(20)
                nmap_tries += 1
                nmap = Process('nmap ' +
                               ' '.join([host for host in nobr_hosts]) +
                               ' -p 22').run()
                for line in nmap.stdout.split('\n'):
                    if 'Nmap done' in line:
                        if_up = line.split()[2] == line.split()[5].replace(
                            '(', '')
            logger.debug('Network has been restarted')
        logger.detail('All hosts have the bridge %s', style.emph(name))
예제 #11
0
파일: deployment.py 프로젝트: badock/vm5k
    def _configure_apt(self):
        """Create the sources.list file """
        logger.detail('Configuring APT')
        # Create sources.list file
        fd, tmpsource = mkstemp(dir='/tmp/', prefix='sources.list_')
        f = fdopen(fd, 'w')
        f.write('deb http://ftp.debian.org/debian %s main contrib non-free\n' % self.debian_name + \
                'deb http://ftp.debian.org/debian %s-backports main contrib non-free\n' % self.debian_name+ \
                'deb http://security.debian.org/ %s/updates main contrib non-free\n' % self.debian_name)
        f.close()
        # Create preferences file
        fd, tmppref = mkstemp(dir='/tmp/', prefix='preferences_')
        f = fdopen(fd, 'w')
        f.write('Package: * \nPin: release a=%s \nPin-Priority: 900\n\n' % self.debian_name + \
                'Package: * \nPin: release a=%s-backports \nPin-Priority: 875\n\n' % self.debian_name)
        f.close()
        # Create apt.conf file
        fd, tmpaptconf = mkstemp(dir='/tmp/', prefix='apt.conf_')
        f = fdopen(fd, 'w')
        f.write('APT::Acquire::Retries=20;\n')
        f.close()

        TaktukPut(self.hosts, [tmpsource, tmppref, tmpaptconf],
                  remote_location='/etc/apt/').run()
        cmd = 'cd /etc/apt && ' + \
            'mv ' + tmpsource.split('/')[-1] + ' sources.list &&' + \
            'mv ' + tmppref.split('/')[-1] + ' preferences &&' + \
            'mv ' + tmpaptconf.split('/')[-1] + ' apt.conf'
        apt_conf = self.fact.get_remote(cmd, self.hosts).run()
        self._actions_hosts(apt_conf)
        Local('rm ' + tmpsource + ' ' + tmppref + ' ' + tmpaptconf).run()
예제 #12
0
def __get_site(site):
    logger.detail(site)
    site_attrs_th = threading.Thread(target=__get_site_attrs, args=(site, ))
    site_attrs_th.start()
    site_network_th = threading.Thread(target=__get_site_network,
                                       args=(site, ))
    site_network_th.start()
    cluster_attrs_th = {}
    host_attrs_th = {}
    for cluster in _get_site_clusters_uncached(site):
        t = threading.Thread(target=__get_cluster_attrs, args=(site, cluster))
        t.start()
        cluster_attrs_th[cluster] = t
        t = threading.Thread(target=__get_host_attrs, args=(site, cluster))
        t.start()
        host_attrs_th[cluster] = t
    for t in [site_attrs_th, site_network_th] + list(
            cluster_attrs_th.values()) + list(host_attrs_th.values()):
        t.join()
    threading.currentThread().site_data = site_attrs_th.site_data
    threading.currentThread().network_data = site_network_th.network_data
    threading.currentThread().cluster_data = {}
    threading.currentThread().host_data = {}
    for cluster in cluster_attrs_th:
        threading.currentThread(
        ).cluster_data[cluster] = cluster_attrs_th[cluster].cluster_data
        threading.currentThread(
        ).host_data[cluster] = host_attrs_th[cluster].host_data
예제 #13
0
파일: deployment.py 프로젝트: badock/vm5k
 def _libvirt_uniquify(self):
     logger.detail('Making libvirt host unique')
     cmd = 'uuid=`uuidgen` ' + \
         '&& sed -i "s/.*host_uuid.*/host_uuid=\\"${uuid}\\"/g" ' + \
         '/etc/libvirt/libvirtd.conf ' + \
         '&& service libvirtd restart'
     logger.debug(cmd)
     self.fact.get_remote(cmd, self.hosts).run()
예제 #14
0
 def _libvirt_uniquify(self):
     logger.detail('Making libvirt host unique')
     cmd = 'uuid=`uuidgen` ' + \
         '&& sed -i "s/.*host_uuid.*/host_uuid=\\"${uuid}\\"/g" ' + \
         '/etc/libvirt/libvirtd.conf ' + \
         '&& service libvirtd restart'
     logger.debug(cmd)
     self.fact.get_remote(cmd, self.hosts).run()
예제 #15
0
 def _libvirt_check_service(self):
     """ """
     logger.detail('Checking libvirt service name')
     cmd = "if [ ! -e /etc/init.d/libvirtd ]; " + \
         "  then if [ -e /etc/init.d/libvirt-bin ]; " + \
         "       then ln -s /etc/init.d/libvirt-bin /etc/init.d/libvirtd; " + \
         "       else echo 1; " + \
         "        fi; " + \
         "else echo 0; fi"
     check_libvirt = self.fact.get_remote(cmd, self.hosts).run()
     self._actions_hosts(check_libvirt)
예제 #16
0
파일: api_utils.py 프로젝트: mickours/execo
def _read_api_cache(cache_dir):
    """Read the picke files from cache_dir and return two dicts
    - network = the network_equipements of all sites and backbone
    - hosts = the hosts of all sites
    """
    data = {}
    logger.detail('Reading data from cache ...')
    for e in ['network', 'sites', 'clusters', 'hosts', 'hierarchy']:
        with open(cache_dir + e, 'rb') as f:
            data[e] = load(f)
    return data
예제 #17
0
def _read_api_cache(cache_dir):
    """Read the picke files from cache_dir and return two dicts
    - network = the network_equipements of all sites and backbone
    - hosts = the hosts of all sites
    """
    data = {}
    logger.detail('Reading data from cache ...')
    for e in ['network', 'sites', 'clusters', 'hosts', 'hierarchy']:
        with open(cache_dir + e, 'rb') as f:
            data[e] = load(f)
    return data
예제 #18
0
파일: deployment.py 프로젝트: badock/vm5k
 def _libvirt_check_service(self):
     """ """
     logger.detail('Checking libvirt service name')
     cmd = "if [ ! -e /etc/init.d/libvirtd ]; " + \
         "  then if [ -e /etc/init.d/libvirt-bin ]; " + \
         "       then ln -s /etc/init.d/libvirt-bin /etc/init.d/libvirtd; " + \
         "       else echo 1; " + \
         "        fi; " + \
         "else echo 0; fi"
     check_libvirt = self.fact.get_remote(cmd, self.hosts).run()
     self._actions_hosts(check_libvirt)
예제 #19
0
파일: aptcacher.py 프로젝트: badock/vm5k
def configure_apt_proxy(vms):
    """Override apt proxy-guess with server as proxy"""
    hosts_vms = {}
    for vm in vms:
        if not vm['host'] in hosts_vms:
            hosts_vms[vm['host']] = []
        hosts_vms[vm['host']].append(vm['ip'])
    conf = []
    for server, clients in hosts_vms.iteritems():
        server = Host(server)
        logger.detail('Configuring %s as APT proxy for %s',
                      style.host(server.address), ','.join(clients))
        conf.append(TaktukRemote(' echo \'Acquire::http::Proxy \"http://' + 
                                 server.address + ':9999" ; \' > /etc/apt/apt.conf.d/proxy-guess', 
                                 clients))
    ParallelActions(conf).run()
예제 #20
0
파일: actions.py 프로젝트: sphilippot/vm5k
def create_disks(vms):
    """ Return an action to create the disks for the VMs on the hosts"""
    logger.detail(', '.join([vm['id'] for vm in sorted(vms)]))
    hosts_cmds = {}

    for vm in vms:
        if vm['real_file']:
            cmd = cmd_disk_real(vm)
        else:
            cmd = cmd_disk_qcow2(vm)
        logger.detail(vm['id'] + ': ' + cmd)
        hosts_cmds[vm['host']] = cmd if not vm['host'] in hosts_cmds \
            else hosts_cmds[vm['host']] + cmd

    logger.debug(pformat(hosts_cmds.values()))

    return TaktukRemote('{{hosts_cmds.values()}}', list(hosts_cmds.keys()))
예제 #21
0
파일: actions.py 프로젝트: lpouillo/vm5k
def create_disks(vms):
    """ Return an action to create the disks for the VMs on the hosts"""
    logger.detail(', '.join([vm['id'] for vm in sorted(vms)]))
    hosts_cmds = {}

    for vm in vms:
        if vm['real_file']:
            cmd = cmd_disk_real(vm)
        else:
            cmd = cmd_disk_qcow2(vm)
        logger.detail(vm['id'] + ': ' + cmd)
        hosts_cmds[vm['host']] = cmd if not vm['host'] in hosts_cmds \
            else hosts_cmds[vm['host']] + cmd

    logger.debug(pformat(hosts_cmds.values()))

    return TaktukRemote('{{hosts_cmds.values()}}', list(hosts_cmds.keys()))
예제 #22
0
def configure_apt_proxy(vms):
    """Override apt proxy-guess with server as proxy"""
    hosts_vms = {}
    for vm in vms:
        if not vm['host'] in hosts_vms:
            hosts_vms[vm['host']] = []
        hosts_vms[vm['host']].append(vm['ip'])
    conf = []
    for server, clients in hosts_vms.iteritems():
        server = Host(server)
        logger.detail('Configuring %s as APT proxy for %s',
                      style.host(server.address), ','.join(clients))
        conf.append(
            TaktukRemote(
                ' echo \'Acquire::http::Proxy \"http://' + server.address +
                ':9999" ; \' > /etc/apt/apt.conf.d/proxy-guess', clients))
    ParallelActions(conf).run()
예제 #23
0
파일: actions.py 프로젝트: lpouillo/vm5k
def install_vms(vms):
    """ Return an action to install the VM on the hosts"""
    logger.detail(', '.join([vm['id'] for vm in sorted(vms)]))
    hosts_cmds = {}
    for vm in vms:
        cmd = 'virt-install -d --import --connect qemu:///system ' + \
            '--nographics --noautoconsole --noreboot --name=' + vm['id'] + ' '\
            '--network network=default,mac=' + vm['mac'] + ' --ram=' + \
            str(vm['mem']) + ' --disk path=/tmp/' + vm['id'] + \
            '.qcow2,device=disk,bus=virtio,format=qcow2,size=' + \
            str(vm['hdd']) + ',cache=none ' + \
            '--vcpus=' + str(vm['n_cpu']) + ' --cpuset=' + vm['cpuset']
        if vm['tap']:
            cmd += '--network tap,script=no,ifname=' + vm['tap']
        cmd += ' ; '
        hosts_cmds[vm['host']] = cmd if not vm['host'] in hosts_cmds \
            else hosts_cmds[vm['host']] + cmd

    return TaktukRemote('{{hosts_cmds.values()}}', list(hosts_cmds.keys()))
예제 #24
0
파일: actions.py 프로젝트: sphilippot/vm5k
def install_vms(vms):
    """ Return an action to install the VM on the hosts"""
    logger.detail(', '.join([vm['id'] for vm in sorted(vms)]))
    hosts_cmds = {}
    for vm in vms:
        cmd = 'virt-install -d --import --connect qemu:///system ' + \
            '--nographics --noautoconsole --noreboot --name=' + vm['id'] + ' '\
            '--network network=default,mac=' + vm['mac'] + ' --ram=' + \
            str(vm['mem']) + ' --disk path=/tmp/' + vm['id'] + \
            '.qcow2,device=disk,bus=virtio,format=qcow2,size=' + \
            str(vm['hdd']) + ',cache=none ' + \
            '--vcpus=' + str(vm['n_cpu']) + ' --cpuset=' + vm['cpuset']
        if vm['tap']:
            cmd += '--network tap,script=no,ifname=' + vm['tap']
        cmd += ' ; '
        hosts_cmds[vm['host']] = cmd if not vm['host'] in hosts_cmds \
            else hosts_cmds[vm['host']] + cmd

    return TaktukRemote('{{hosts_cmds.values()}}', list(hosts_cmds.keys()))
예제 #25
0
파일: api_utils.py 프로젝트: mickours/execo
def _is_cache_old_and_reachable(cache_dir):
    """Try to read the api_commit stored in the cache_dir and compare
    it with latest commit, return True if remote commit is different
    from cache commit"""
    try:
        with open(cache_dir + 'api_commit') as f:
            local_commit = f.readline()
    except:
        logger.detail('No commit version found')
        return True
    try:
        api_commit = get_resource_attributes('')['version']
    except:
        logger.warning('Unable to check API, reverting to cache')
        return False
    if local_commit != get_resource_attributes('')['version']:
        logger.info('Cache is outdated, will retrieve the latest commit')
        return True
    else:
        logger.detail('Already at the latest commit')
        return False
예제 #26
0
파일: planning.py 프로젝트: msimonin/execo
def get_job_by_name(job_name, sites=None):
    """ """
    logger.detail('Looking for a job named %s', style.emph(job_name))
    if not sites:
        sites = get_g5k_sites()
    oargrid_jobs = get_current_oargrid_jobs()
    if len(oargrid_jobs) > 0:
        for g_job in oargrid_jobs:
            for job in get_oargrid_job_oar_jobs(g_job):
                info = get_oar_job_info(job[0], job[1])
                if info['name'] == job_name:
                    logger.info('Oargridjob %s found !', style.emph(g_job))
                    return g_job, None
    running_jobs = get_current_oar_jobs(sites)
    for job in running_jobs:
        info = get_oar_job_info(job[0], job[1])
        if info['name'] == job_name:
            logger.info('Job %s found on site %s !', style.emph(job[0]),
                        style.host(job[1]))
            return job
    return None, None
예제 #27
0
def _is_cache_old_and_reachable(cache_dir):
    """Try to read the api_commit stored in the cache_dir and compare
    it with latest commit, return True if remote commit is different
    from cache commit"""
    try:
        with open(cache_dir + 'api_commit') as f:
            local_commit = f.readline()
    except:
        logger.detail('No commit version found')
        return True
    try:
        api_commit = get_resource_attributes('')['version']
    except:
        logger.warning('Unable to check API, reverting to cache')
        return False
    if local_commit != get_resource_attributes('')['version']:
        logger.info('Cache is outdated, will retrieve the latest commit')
        return True
    else:
        logger.detail('Already at the latest commit')
        return False
예제 #28
0
def setup_aptcacher_server(hosts, base_dir='/tmp/apt-cacher-ng'):
    """Install and configure apt-cacher on one server"""
    hosts = map(Host, hosts)
    logger.info('Installing apt-cacher on %s',
                ','.join([style.host(host.address) for host in hosts]))
    logger.detail('Package')
    package = TaktukRemote(
        'export DEBIAN_MASTER=noninteractive ; apt-get update ; ' +
        'apt-get install -o Dpkg::Options::="--force-confdef" -o ' +
        'Dpkg::Options::="--force-confnew" -y apt-cacher-ng', hosts).run()
    if not package.ok:
        logger.error('Unable to install apt-cacher-ng on %s')
        return

    logger.detail('Directory creation')
    log_dir = base_dir + '/log'
    cache_dir = base_dir + '/cache'
    mkdirs = TaktukRemote(
        'mkdir -p ' + log_dir + '; mkdir -p ' + cache_dir +
        '; chown -R apt-cacher-ng:apt-cacher-ng ' + base_dir, hosts).run()
    if not mkdirs.ok:
        logger.error('Unable to create the directories')
        return
    cmd = 'sed -i "s#/var/cache/apt-cacher-ng#' + cache_dir + \
          '#g" /etc/apt-cacher-ng/acng.conf ;' + \
          'sed -i "s#/var/log/apt-cacher-ng#' + log_dir + '#g" ' + \
          '/etc/apt-cacher-ng/acng.conf ;' + \
          'sed -i "s/3142/9999/g" /etc/apt-cacher-ng/acng.conf ; ' + \
          'sed -i "s?#Proxy: http://www-proxy.example.net:80?Proxy: ' + \
          'http://proxy:3128?g" /etc/apt-cacher-ng/acng.conf ; ' + \
          'service apt-cacher-ng restart'
    configure = TaktukRemote(cmd, hosts).run()
    if not configure.ok:
        logger.error('Unable to configure and restart the service')
        return

    logger.info('apt-cacher-ng up and running on %s',
                ','.join([style.host(host.address) for host in hosts]))
예제 #29
0
파일: aptcacher.py 프로젝트: badock/vm5k
def setup_aptcacher_server(hosts, base_dir='/tmp/apt-cacher-ng'):
    """Install and configure apt-cacher on one server"""
    hosts = map(Host, hosts)
    logger.info('Installing apt-cacher on %s',
                ','.join([style.host(host.address) for host in hosts]))
    logger.detail('Package')
    package = TaktukRemote('export DEBIAN_MASTER=noninteractive ; apt-get update ; ' +
                           'apt-get install -o Dpkg::Options::="--force-confdef" -o ' +
                           'Dpkg::Options::="--force-confnew" -y apt-cacher-ng',
                           hosts).run()
    if not package.ok:
        logger.error('Unable to install apt-cacher-ng on %s')
        return

    logger.detail('Directory creation')
    log_dir = base_dir + '/log'
    cache_dir = base_dir + '/cache'
    mkdirs = TaktukRemote('mkdir -p ' + log_dir + '; mkdir -p ' + cache_dir +
                          '; chown -R apt-cacher-ng:apt-cacher-ng ' + base_dir,
                          hosts).run()
    if not mkdirs.ok:
        logger.error('Unable to create the directories')
        return
    cmd = 'sed -i "s#/var/cache/apt-cacher-ng#' + cache_dir + \
          '#g" /etc/apt-cacher-ng/acng.conf ;' + \
          'sed -i "s#/var/log/apt-cacher-ng#' + log_dir + '#g" ' + \
          '/etc/apt-cacher-ng/acng.conf ;' + \
          'sed -i "s/3142/9999/g" /etc/apt-cacher-ng/acng.conf ; ' + \
          'sed -i "s?#Proxy: http://www-proxy.example.net:80?Proxy: ' + \
          'http://proxy:3128?g" /etc/apt-cacher-ng/acng.conf ; ' + \
          'service apt-cacher-ng restart'
    configure = TaktukRemote(cmd, hosts).run()
    if not configure.ok:
        logger.error('Unable to configure and restart the service')
        return

    logger.info('apt-cacher-ng up and running on %s',
                ','.join([style.host(host.address) for host in hosts]))
예제 #30
0
파일: utils.py 프로젝트: badock/vm5k
def wait_hosts_up(hosts, timeout=300):
    """ """
    down_hosts = map(lambda x: x.address if isinstance(x, Host) else x,
                     hosts)
    fd, hosts_file = mkstemp(dir='/tmp/', prefix='hosts_')
    f = fdopen(fd, 'w')
    f.write('\n' + '\n'.join(down_hosts))
    f.close()
    timer = Timer()
    while len(down_hosts) > 0 and timer.elapsed() < timeout:
        nmap = Process("nmap -v -oG - -i %s -p 22 |grep Host|grep Status" %
                       (hosts_file, ), shell=True).run()
        logger.debug('timer: %s \nnmap output: \n%s', timer.elapsed(),
                     nmap.stdout.strip())
        for line in nmap.stdout.strip().split('\n'):
            s = line.split()[2]
            host = s[s.find("(") + 1:s.find(")")]
            if host in down_hosts:
                logger.detail('%s is up', host)
                down_hosts.remove(host)
    Process('rm ' + hosts_file).run()
    sleep(3)
    return len(down_hosts) == 0
예제 #31
0
def wait_hosts_up(hosts, timeout=300):
    """ """
    down_hosts = map(lambda x: x.address if isinstance(x, Host) else x, hosts)
    fd, hosts_file = mkstemp(dir='/tmp/', prefix='hosts_')
    f = fdopen(fd, 'w')
    f.write('\n' + '\n'.join(down_hosts))
    f.close()
    timer = Timer()
    while len(down_hosts) > 0 and timer.elapsed() < timeout:
        nmap = Process("nmap -v -oG - -i %s -p 22 |grep Host|grep Status" %
                       (hosts_file, ),
                       shell=True).run()
        logger.debug('timer: %s \nnmap output: \n%s', timer.elapsed(),
                     nmap.stdout.strip())
        for line in nmap.stdout.strip().split('\n'):
            s = line.split()[2]
            host = s[s.find("(") + 1:s.find(")")]
            if host in down_hosts:
                logger.detail('%s is up', host)
                down_hosts.remove(host)
    Process('rm ' + hosts_file).run()
    sleep(3)
    return len(down_hosts) == 0
예제 #32
0
    def _create_backing_file(self, disks=None):
        """ """
        if not self.copy_actions:
            self._start_disk_copy(disks)
        if not self.copy_actions.ended:
            logger.info("Waiting for the end of the disks copy")
            self.copy_actions.wait()
        if isinstance(self.copy_actions, ParallelActions):
            mv_actions = []
            for act in self.copy_actions.actions:
                fname = act.local_files[0].split('/')[-1]
                mv_actions.append(
                    self.fact.get_remote(
                        "mv /tmp/" + fname + " /tmp/orig_" + fname,
                        self.hosts))

            mv = ParallelActions(mv_actions).run()

        if not disks:
            disks = self.backing_files
        for bf in disks:
            raw_disk = '/tmp/orig_' + bf.split('/')[-1]
            to_disk = '/tmp/' + bf.split('/')[-1]
            self.fact.get_remote('cp ' + raw_disk + ' ' + to_disk,
                                 self.hosts).run()
            logger.info('Copying ssh key on ' + to_disk + ' ...')
            cmd = 'modprobe nbd max_part=16; ' + \
                'qemu-nbd --connect=/dev/nbd0 ' + to_disk + \
                ' ; sleep 3 ; partprobe /dev/nbd0 ; ' + \
                'part=`fdisk -l /dev/nbd0 |grep dev|grep Linux| grep -v swap|cut -f 1 -d " "` ; ' + \
                'mount $part /mnt ; mkdir -p /mnt/root/.ssh ; ' + \
                'cat /root/.ssh/authorized_keys >> /mnt/root/.ssh/authorized_keys ; ' + \
                'cp -r /root/.ssh/id_rsa* /mnt/root/.ssh/ ;' + \
                'umount /mnt; qemu-nbd -d /dev/nbd0'
            logger.detail(cmd)
            copy_on_vm_base = self.fact.get_remote(cmd, self.hosts).run()
            self._actions_hosts(copy_on_vm_base)
예제 #33
0
파일: api_utils.py 프로젝트: mickours/execo
def __get_site(site):
    logger.detail(site)
    site_attrs_th = threading.Thread(target = __get_site_attrs, args = (site,))
    site_attrs_th.start()
    site_network_th = threading.Thread(target = __get_site_network, args = (site,))
    site_network_th.start()
    cluster_attrs_th = {}
    host_attrs_th = {}
    for cluster in _get_site_clusters_uncached(site):
        t = threading.Thread(target = __get_cluster_attrs, args = (site, cluster))
        t.start()
        cluster_attrs_th[cluster] = t
        t = threading.Thread(target = __get_host_attrs, args = (site, cluster))
        t.start()
        host_attrs_th[cluster] = t
    for t in [ site_attrs_th, site_network_th ] + list(cluster_attrs_th.values()) + list(host_attrs_th.values()):
        t.join()
    threading.currentThread().site_data = site_attrs_th.site_data
    threading.currentThread().network_data = site_network_th.network_data
    threading.currentThread().cluster_data = {}
    threading.currentThread().host_data = {}
    for cluster in cluster_attrs_th:
        threading.currentThread().cluster_data[cluster] = cluster_attrs_th[cluster].cluster_data
        threading.currentThread().host_data[cluster] = host_attrs_th[cluster].host_data
예제 #34
0
파일: api_utils.py 프로젝트: mickours/execo
def _write_api_cache(cache_dir, data):
    """write Grid'5000 API data into cache directory"""
    if not path.exists(cache_dir):
        makedirs(cache_dir)
        logger.detail('No cache found, directory created')
    else:
        logger.detail('Cache directory is present')

    logger.detail('Writing data to cache ...')
    for e, d in data.items():
        with open(cache_dir + e, 'wb') as f:
            dump(d, f)
    with open(cache_dir + 'api_commit', 'w') as f:
        f.write(data['network']['backbone'][0]['version'])
예제 #35
0
def _write_api_cache(cache_dir, data):
    """write Grid'5000 API data into cache directory"""
    if not path.exists(cache_dir):
        makedirs(cache_dir)
        logger.detail('No cache found, directory created')
    else:
        logger.detail('Cache directory is present')

    logger.detail('Writing data to cache ...')
    for e, d in data.items():
        with open(cache_dir + e, 'wb') as f:
            dump(d, f)
    with open(cache_dir + 'api_commit', 'w') as f:
        f.write(data['network']['backbone'][0]['version'])
예제 #36
0
def __get_site_attrs(site):
    logger.detail(site + " attrs")
    threading.currentThread().site_data = get_resource_attributes('sites/' +
                                                                  site)
예제 #37
0
def __get_backbone():
    logger.detail("backbone network")
    threading.currentThread().backbone_data = get_resource_attributes(
        '/network_equipments')['items']
예제 #38
0
파일: actions.py 프로젝트: lpouillo/vm5k
def wait_vms_have_started(vms, restart=True):
    """Scan port 22 on all vms, distributed on hosts"""
    # Creating file with list of VMs ip
    fd, tmpfile = tempfile.mkstemp(prefix='vmips')
    f = fdopen(fd, 'w')
    for vm in vms:
        f.write(vm['ip'] + '\n')
    f.close()
    # getting the list of host
    hosts = list(set([vm['host'] for vm in vms]))
    hosts.sort()
    # Pushing file on all hosts
    TaktukPut(hosts, [tmpfile]).run()
    logger.debug(pformat(hosts))
    # Splitting nmap scan
    n_vm_scan = ceil(len(vms) / len(hosts)) + 1
    cmds = []
    for i in range(len(hosts)):
        start = str(int(i * n_vm_scan))
        end = str(int((i + 1) * n_vm_scan))
        cmds.append("awk 'NR>=" + start + " && NR<" + end +
                    "' " + tmpfile.split('/')[-1] + " > nmap_file ; "
                    + "nmap -v -oG - -i nmap_file -p 22")
    logger.debug('%s', pformat(cmds))
    nmap = TaktukRemote('{{cmds}}', hosts)
    nmap_tries = 0
    all_up = False
    started_vms = []
    old_started = started_vms[:]
    while (not all_up) and nmap_tries < 10:
        sleep(15)
        logger.detail('nmap_tries %s', nmap_tries)
        nmap.run()
        for p in nmap.processes:
            for line in p.stdout.split('\n'):
                if 'Status' in line:
                    split_line = line.split(' ')
                    ip = split_line[1]
                    state = split_line[3].strip()
                    if state == 'Up':
                        vm = [vm for vm in vms if vm['ip'] == ip]
                        if len(vm) > 0:
                            vm[0]['state'] = 'OK'

        started_vms = [vm for vm in vms if vm['state'] == 'OK']
        all_up = len(started_vms) == len(vms)
        if started_vms != old_started:
            old_started = started_vms
        else:
            if restart:
                restart_vms([vm for vm in vms if vm['state'] == 'KO'])
            nmap_tries += 1
        if nmap_tries == 1:
            activate_vms([vm for vm in vms if vm['state'] == 'KO'])
        if not all_up:
            logger.info(str(nmap_tries) + ': ' + str(len(started_vms)) + '/' +
                        str(len(vms)))
        nmap.reset()

    TaktukRemote('rm ' + tmpfile.split('/')[-1], hosts).run()
    Process('rm ' + tmpfile).run()
    if all_up:
        logger.info('All VM have been started')
        return True
    else:
        logger.error('All VM have not been started')
        return False
예제 #39
0
파일: api_utils.py 프로젝트: mickours/execo
def __get_site_network(site):
    logger.detail(site + " network")
    threading.currentThread().network_data = {}
    for equip in get_resource_attributes('sites/' + site + '/network_equipments')['items']:
        threading.currentThread().network_data[equip['uid']] = equip
예제 #40
0
파일: api_utils.py 프로젝트: mickours/execo
def __get_backbone():
    logger.detail("backbone network")
    threading.currentThread().backbone_data = get_resource_attributes('/network_equipments')['items']
예제 #41
0
def __get_site_network(site):
    logger.detail(site + " network")
    threading.currentThread().network_data = {}
    for equip in get_resource_attributes('sites/' + site +
                                         '/network_equipments')['items']:
        threading.currentThread().network_data[equip['uid']] = equip
예제 #42
0
def __get_cluster_attrs(site, cluster):
    logger.detail(cluster + " attrs")
    threading.currentThread().cluster_data = get_resource_attributes(
        'sites/' + site + '/clusters/' + cluster)
예제 #43
0
파일: api_utils.py 프로젝트: mickours/execo
def __get_cluster_attrs(site, cluster):
    logger.detail(cluster + " attrs")
    threading.currentThread().cluster_data = get_resource_attributes('sites/' + site
                                                                     + '/clusters/'
                                                                     + cluster)
예제 #44
0
파일: api_utils.py 프로젝트: mickours/execo
def __get_host_attrs(site, cluster):
    logger.detail(cluster + " hosts")
    threading.currentThread().host_data = {}
    for host in get_resource_attributes('sites/' + site + '/clusters/'
                                        + cluster + '/nodes')['items']:
        threading.currentThread().host_data[host['uid']] = host
예제 #45
0
def __get_host_attrs(site, cluster):
    logger.detail(cluster + " hosts")
    threading.currentThread().host_data = {}
    for host in get_resource_attributes('sites/' + site + '/clusters/' +
                                        cluster + '/nodes')['items']:
        threading.currentThread().host_data[host['uid']] = host
예제 #46
0
파일: api_utils.py 프로젝트: mickours/execo
def __get_site_attrs(site):
    logger.detail(site + " attrs")
    threading.currentThread().site_data = get_resource_attributes('sites/' + site)
예제 #47
0
파일: planning.py 프로젝트: msimonin/execo
def _get_site_planning_API(site, site_planning, ignore_besteffort):
    try:
        alive_nodes = set([
            str(node['network_address']) for node in get_resource_attributes(
                '/sites/' + site +
                '/internal/oarapi/resources/details.json?limit=2^30')['items']
            if node['type'] == 'default' and node['state'] != 'Dead'
            and node['maintenance'] != 'YES'
        ])

        for host in alive_nodes:
            host_cluster = get_host_cluster(str(host))
            if host_cluster in site_planning:
                site_planning[host_cluster].update(
                    {host: {
                        'busy': [],
                        'free': []
                    }})
        if 'vlans' in site_planning:
            site_planning['vlans'] = {}
            for vlan in _get_vlans_API(site):
                site_planning['vlans'][vlan] = {'busy': [], 'free': []}
        # STORAGE AND SUBNETS MISSING
        # Retrieving jobs

        site_jobs = get_resource_attributes(
            '/sites/' + site +
            '/jobs?limit=1073741824&state=waiting,launching,running')['items']
        jobs_links = [ link['href'] for job in site_jobs for link in job['links'] \
                      if link['rel'] == 'self' and (ignore_besteffort == False or job['queue'] != 'besteffort') ]
        threads = []
        for link in jobs_links:
            t = Thread(target=_get_job_link_attr_API,
                       args=('/' + str(link).split('/', 2)[2], ))
            t.broken = False
            t.attr = None
            t.ex = None
            threads.append(t)
            t.start()
        for t in threads:
            t.join()
            if t.broken:
                raise t.ex
            attr = t.attr
            try:
                start_time = attr['started_at'] if attr[
                    'started_at'] != 0 else attr['scheduled_at']
                end_time = start_time + attr['walltime']
            except:
                continue
            start_time, end_time = _fix_job(start_time, end_time)
            nodes = attr['assigned_nodes']
            for node in nodes:
                cluster = node.split('.', 1)[0].split('-')[0]
                if cluster in site_planning and node in site_planning[cluster]:
                    site_planning[cluster][node]['busy'].append(
                        (start_time, end_time))
            if 'vlans' in site_planning and 'vlans' in attr['resources_by_type'] \
                and int(attr['resources_by_type']['vlans'][0]) > 3:

                kavname = 'kavlan-' + str(
                    attr['resources_by_type']['vlans'][0])
                site_planning['vlans'][kavname]['busy'].append(
                    (start_time, end_time))
            if 'subnets' in site_planning and 'subnets' in attr[
                    'resources_by_type']:
                for subnet in attr['resources_by_type']['subnets']:
                    if subnet not in site_planning['subnets']:
                        site_planning['subnets'][subnet] = {
                            'busy': [],
                            'free': []
                        }
                    site_planning['subnets'][subnet]['busy'].append(
                        (start_time, end_time))
            # STORAGE IS MISSING
    except Exception as e:
        logger.warn(
            'error connecting to oar database / getting planning from ' + site)
        logger.detail("exception:\n" + format_exc())
        currentThread().broken = True
예제 #48
0
파일: actions.py 프로젝트: sphilippot/vm5k
def wait_vms_have_started(vms, restart=True):
    """Scan port 22 on all vms, distributed on hosts"""
    # Creating file with list of VMs ip
    fd, tmpfile = tempfile.mkstemp(prefix='vmips')
    f = fdopen(fd, 'w')
    for vm in vms:
        f.write(vm['ip'] + '\n')
    f.close()
    # getting the list of host
    hosts = list(set([vm['host'] for vm in vms]))
    hosts.sort()
    # Pushing file on all hosts
    TaktukPut(hosts, [tmpfile]).run()
    logger.debug(pformat(hosts))
    # Splitting nmap scan
    n_vm_scan = ceil(len(vms) / len(hosts)) + 1
    cmds = []
    for i in range(len(hosts)):
        start = str(int(i * n_vm_scan))
        end = str(int((i + 1) * n_vm_scan))
        cmds.append("awk 'NR>=" + start + " && NR<" + end + "' " +
                    tmpfile.split('/')[-1] + " > nmap_file ; " +
                    "nmap -v -oG - -i nmap_file -p 22")
    logger.debug('%s', pformat(cmds))
    nmap = TaktukRemote('{{cmds}}', hosts)
    nmap_tries = 0
    all_up = False
    started_vms = []
    old_started = started_vms[:]
    while (not all_up) and nmap_tries < 10:
        sleep(15)
        logger.detail('nmap_tries %s', nmap_tries)
        nmap.run()
        for p in nmap.processes:
            for line in p.stdout.split('\n'):
                if 'Status' in line:
                    split_line = line.split(' ')
                    ip = split_line[1]
                    state = split_line[3].strip()
                    if state == 'Up':
                        vm = [vm for vm in vms if vm['ip'] == ip]
                        if len(vm) > 0:
                            vm[0]['state'] = 'OK'

        started_vms = [vm for vm in vms if vm['state'] == 'OK']
        all_up = len(started_vms) == len(vms)
        if started_vms != old_started:
            old_started = started_vms
        else:
            if restart:
                restart_vms([vm for vm in vms if vm['state'] == 'KO'])
            nmap_tries += 1
        if nmap_tries == 1:
            activate_vms([vm for vm in vms if vm['state'] == 'KO'])
        if not all_up:
            logger.info(
                str(nmap_tries) + ': ' + str(len(started_vms)) + '/' +
                str(len(vms)))
        nmap.reset()

    TaktukRemote('rm ' + tmpfile.split('/')[-1], hosts).run()
    Process('rm ' + tmpfile).run()
    if all_up:
        logger.info('All VM have been started')
        return True
    else:
        logger.error('All VM have not been started')
        return False
예제 #49
0
파일: planning.py 프로젝트: msimonin/execo
def _get_site_planning_PGSQL(site, site_planning, ignore_besteffort):
    try:
        with G5kAutoPortForwarder(
                site, 'oardb.' + site + '.grid5000.fr',
                g5k_configuration['oar_pgsql_ro_port']) as (host, port):
            conn = psycopg2.connect(
                host=host,
                port=port,
                user=g5k_configuration['oar_pgsql_ro_user'],
                password=g5k_configuration['oar_pgsql_ro_password'],
                database=g5k_configuration['oar_pgsql_ro_db'])
            try:
                cur = conn.cursor()
                # Retrieving alive resources
                sql = """SELECT DISTINCT R.type, R.network_address, R.vlan, R.subnet_address
                    FROM resources R
                    WHERE state <> 'Dead' AND R.maintenance <> 'YES';"""

                cur.execute(sql)

                for data in cur.fetchall():
                    if data[0] == "default":
                        cluster = get_host_cluster(data[1])
                        if cluster in site_planning:
                            site_planning[cluster][data[1]] = {
                                'busy': [],
                                'free': []
                            }
                    if data[0] in ['kavlan', 'kavlan-global'] \
                        and 'vlans' in site_planning:
                        site_planning['vlans']['kavlan-' + data[2]] = {
                            'busy': [],
                            'free': []
                        }
                    if data[0] == "subnet" and 'subnet' in site_planning:
                        site_planning['subnets'][data[3]] = {
                            'busy': [],
                            'free': []
                        }

                sql = (
                    """SELECT J.job_id, J.state, GJP.start_time AS start_time,
                GJP.start_time+MJD.moldable_walltime,
                array_agg(DISTINCT R.network_address) AS hosts,
                array_agg(DISTINCT R.vlan) AS vlan,
                array_agg(DISTINCT R.subnet_address) AS subnets
                FROM jobs J
                LEFT JOIN moldable_job_descriptions MJD
                    ON MJD.moldable_job_id=J.job_id
                LEFT JOIN gantt_jobs_predictions GJP
                    ON GJP.moldable_job_id=MJD.moldable_id
                INNER JOIN gantt_jobs_resources AR
                    ON AR.moldable_job_id=MJD.moldable_id
                LEFT JOIN resources R
                    ON AR.resource_id=R.resource_id
                WHERE ( J.state='Launching' OR J.state='Running' OR J.state='Waiting')
                """ + (""" AND queue_name<>'besteffort'"""
                       if ignore_besteffort else """""") +
                    """GROUP BY J.job_id, GJP.start_time, MJD.moldable_walltime
                ORDER BY J.start_time""")

                #                    CONVERT(SUBSTRING_INDEX(SUBSTRING_INDEX(R.network_address,'.',1),'-',-1), SIGNED)"""
                cur.execute(sql)

                for job in cur.fetchall():
                    start_time = job[2]
                    end_time = job[3]
                    start_time, end_time = _fix_job(start_time, end_time)
                    if len(job[4]) > 0:
                        for host in job[4]:
                            if host != '':
                                cluster = get_host_cluster(host)
                                if cluster in site_planning:
                                    if host in site_planning[cluster]:
                                        site_planning[cluster][host][
                                            'busy'].append(
                                                (start_time, end_time))
                    if job[5][0] and 'vlans' in site_planning:
                        for vlan in job[5]:
                            if isinstance(vlan, str) and int(vlan) > 3:
                                # only routed vlan
                                site_planning['vlans']['kavlan-' +
                                                       vlan]['busy'].append(
                                                           (start_time,
                                                            end_time))

                    if len(job[6]) > 0 and 'subnet' in site_planning:
                        for subnet in job[6]:
                            site_planning['subnets'][subnet]['busy'].append(
                                (start_time, end_time))
            finally:
                conn.close()
    except Exception as e:
        logger.warn(
            'error connecting to oar database / getting planning from ' + site)
        logger.detail("exception:\n" + format_exc())
        currentThread().broken = True
예제 #50
0
vms = []
for host in state.findall('.//host'):
    for vm in host.findall('.//vm'):
        vms.append({'id': vm.get('id'),
            'n_cpu': int(_default_xml_value('n_cpu')),
            'cpuset': _default_xml_value('cpuset'),
            'mem': int(_default_xml_value('mem')),
            'hdd': int(_default_xml_value('hdd')),
            'backing_file': _default_xml_value('backing_file'),
            'ip': _default_xml_value('ip'),
            'mac': _default_xml_value('mac'),
            'host': host.get('id')})

while True:
    logger.detail('Cleaning all VMS from XML file')
    for el_host in state.findall('.//host'):
        for vm in el_host.findall('./vm'):
            el_host.remove(vm)
    logger.info('Retrieving VMS position and load')
    get_vms_load = TaktukRemote("get_cpu_consumptions.sh",
       hosts).run()
    vms_loads = {}
    hosts_vms = {host: [] for host in hosts}    
    for p in get_vms_load.processes:
        for line in p.stdout.strip().split('\n'):
            logger.detail(p.host.address)
            tmp_load = line.split(' ')
            logger.detail(tmp_load)
            try:
                vms_loads[tmp_load[0]] = float(tmp_load[1]) + float(tmp_load[2]) + float(tmp_load[-1])