Exemple #1
0
    def is_okay(self):
        """Check if network is created and running."""
        # TODO(andy):This will be built upon with further code re-design

        if not self.is_created():
            return False

        if self.db_entry['provide_dhcp'] and util.is_network_node():
            if not self.is_dnsmasq_running():
                return False

        return True
Exemple #2
0
    def run(self):
        LOG.info('Starting')
        last_management = 0

        while True:
            if util.is_network_node():
                self._process_network_node_workitems()
            else:
                management_age = time.time() - last_management
                time.sleep(max(0, 30 - management_age))

            if time.time() - last_management > 30:
                self._maintain_networks()
                last_management = time.time()
Exemple #3
0
 def remove_dhcp(self):
     if util.is_network_node():
         subst = self.subst_dict()
         with util.RecordedOperation('remove dhcp', self):
             with db.get_lock('network', None, self.uuid, ttl=120):
                 d = dhcp.DHCP(self.uuid, subst['vx_veth_inner'])
                 d.remove_dhcpd()
     else:
         db.enqueue('networknode', {
             'type': 'remove_dhcp',
             'network_uuid': self.uuid
         })
         db.add_event('network', self.uuid, 'remove dhcp', 'enqueued', None,
                      None)
Exemple #4
0
 def remove_dhcp(self):
     if util.is_network_node():
         subst = self.subst_dict()
         with util.RecordedOperation('remove dhcp', self):
             with db.get_object_lock(self,
                                     ttl=120,
                                     op='Network remove DHCP'):
                 d = dhcp.DHCP(self, subst['vx_veth_inner'])
                 d.remove_dhcpd()
     else:
         db.enqueue('networknode',
                    RemoveDHCPNetworkTask(self.db_entry['uuid']))
         db.add_event('network', self.db_entry['uuid'], 'remove dhcp',
                      'enqueued', None, None)
Exemple #5
0
    def update_dhcp(self):
        if not self.db_entry['provide_dhcp']:
            return

        if util.is_network_node():
            subst = self.subst_dict()
            with util.RecordedOperation('update dhcp', self):
                with db.get_object_lock(self,
                                        ttl=120,
                                        op='Network update DHCP'):
                    d = dhcp.DHCP(self, subst['vx_veth_inner'])
                    d.restart_dhcpd()
        else:
            db.enqueue('networknode',
                       UpdateDHCPNetworkTask(self.db_entry['uuid']))
            db.add_event('network', self.db_entry['uuid'], 'update dhcp',
                         'enqueued', None, None)
Exemple #6
0
    def update_dhcp(self):
        if not self.provide_dhcp:
            return

        if util.is_network_node():
            self.ensure_mesh()
            subst = self.subst_dict()
            with util.RecordedOperation('update dhcp', self):
                with db.get_lock('network', None, self.uuid, ttl=120):
                    d = dhcp.DHCP(self.uuid, subst['vx_veth_inner'])
                    d.restart_dhcpd()
        else:
            db.enqueue('networknode', {
                'type': 'update_dhcp',
                'network_uuid': self.uuid
            })
            db.add_event('network', self.uuid, 'update dhcp', 'enqueued', None,
                         None)
Exemple #7
0
    def delete(self):
        subst = self.subst_dict()
        LOG.withFields(subst).debug('net.delete()')

        # Cleanup local node
        with db.get_object_lock(self, ttl=120, op='Network delete'):
            if util.check_for_interface(subst['vx_bridge']):
                with util.RecordedOperation('delete vxlan bridge', self):
                    util.execute(None, 'ip link delete %(vx_bridge)s' % subst)

            if util.check_for_interface(subst['vx_interface']):
                with util.RecordedOperation('delete vxlan interface', self):
                    util.execute(None,
                                 'ip link delete %(vx_interface)s' % subst)

            # If this is the network node do additional cleanup
            if util.is_network_node():
                if util.check_for_interface(subst['vx_veth_outer']):
                    with util.RecordedOperation('delete router veth', self):
                        util.execute(
                            None, 'ip link delete %(vx_veth_outer)s' % subst)

                if util.check_for_interface(subst['physical_veth_outer']):
                    with util.RecordedOperation('delete physical veth', self):
                        util.execute(
                            None,
                            'ip link delete %(physical_veth_outer)s' % subst)

                if os.path.exists('/var/run/netns/%(netns)s' % subst):
                    with util.RecordedOperation('delete netns', self):
                        util.execute(None, 'ip netns del %(netns)s' % subst)

                if self.db_entry['floating_gateway']:
                    with db.get_lock('ipmanager',
                                     None,
                                     'floating',
                                     ttl=120,
                                     op='Network delete'):
                        ipm = db.get_ipmanager('floating')
                        ipm.release(self.db_entry['floating_gateway'])
                        db.persist_ipmanager('floating', ipm.save())
Exemple #8
0
    def create(self):
        subst = self.subst_dict()

        with db.get_object_lock(self, ttl=120, op='Network create'):
            # Ensure network was not deleted whilst waiting for the lock.
            if self.is_dead():
                raise DeadNetwork('network=%s' % self)

            if not util.check_for_interface(subst['vx_interface']):
                with util.RecordedOperation('create vxlan interface', self):
                    util.create_interface(
                        subst['vx_interface'], 'vxlan',
                        'id %(vx_id)s dev %(physical_interface)s dstport 0' %
                        subst)
                    util.execute(
                        None, 'sysctl -w net.ipv4.conf.'
                        '%(vx_interface)s.arp_notify=1' % subst)

            if not util.check_for_interface(subst['vx_bridge']):
                with util.RecordedOperation('create vxlan bridge', self):
                    util.create_interface(subst['vx_bridge'], 'bridge', '')
                    util.execute(
                        None, 'ip link set %(vx_interface)s '
                        'master %(vx_bridge)s' % subst)
                    util.execute(None,
                                 'ip link set %(vx_interface)s up' % subst)
                    util.execute(None, 'ip link set %(vx_bridge)s up' % subst)
                    util.execute(
                        None, 'sysctl -w net.ipv4.conf.'
                        '%(vx_bridge)s.arp_notify=1' % subst)
                    util.execute(None, 'brctl setfd %(vx_bridge)s 0' % subst)
                    util.execute(None, 'brctl stp %(vx_bridge)s off' % subst)
                    util.execute(None,
                                 'brctl setageing %(vx_bridge)s 0' % subst)

        if util.is_network_node():
            if not os.path.exists('/var/run/netns/%(netns)s' % subst):
                with util.RecordedOperation('create netns', self):
                    util.execute(None, 'ip netns add %(netns)s' % subst)

            if not util.check_for_interface(subst['vx_veth_outer']):
                with util.RecordedOperation('create router veth', self):
                    util.create_interface(
                        subst['vx_veth_outer'], 'veth',
                        'peer name %(vx_veth_inner)s' % subst)
                    util.execute(
                        None, 'ip link set %(vx_veth_inner)s netns %(netns)s' %
                        subst)
                    util.execute(
                        None,
                        'brctl addif %(vx_bridge)s %(vx_veth_outer)s' % subst)
                    util.execute(None,
                                 'ip link set %(vx_veth_outer)s up' % subst)
                    util.execute(
                        None, '%(in_netns)s ip link set %(vx_veth_inner)s up' %
                        subst)
                    util.execute(
                        None,
                        '%(in_netns)s ip addr add %(router)s/%(netmask)s '
                        'dev %(vx_veth_inner)s' % subst)

            if not util.check_for_interface(subst['physical_veth_outer']):
                with util.RecordedOperation('create physical veth', self):
                    util.create_interface(
                        subst['physical_veth_outer'], 'veth',
                        'peer name %(physical_veth_inner)s' % subst)
                    util.execute(
                        None, 'brctl addif %(physical_bridge)s '
                        '%(physical_veth_outer)s' % subst)
                    util.execute(
                        None, 'ip link set %(physical_veth_outer)s up' % subst)
                    util.execute(
                        None, 'ip link set %(physical_veth_inner)s '
                        'netns %(netns)s' % subst)

            self.deploy_nat()
            self.update_dhcp()
        else:
            db.enqueue('networknode', DeployNetworkTask(self.db_entry['uuid']))
            db.add_event('network', self.db_entry['uuid'], 'deploy',
                         'enqueued', None, None)
Exemple #9
0
def main():
    global DAEMON_IMPLEMENTATIONS
    global DAEMON_PIDS

    setproctitle.setproctitle(daemon.process_name('main'))

    # Log configuration on startup
    for key, value in config.dict().items():
        LOG.info('Configuration item %s = %s' % (key, value))

    daemon.set_log_level(LOG, 'main')

    # Check in early and often, also reset processing queue items
    db.clear_stale_locks()
    db.see_this_node()
    db.restart_queues()

    def _start_daemon(d):
        pid = os.fork()
        if pid == 0:
            DAEMON_IMPLEMENTATIONS[d].Monitor(d).run()
        DAEMON_PIDS[pid] = d
        LOG.withField('pid', pid).info('Started %s' % d)

    # Resource usage publisher, we need this early because scheduling decisions
    # might happen quite early on.
    _start_daemon('resources')

    # If I am the network node, I need some setup
    if util.is_network_node():
        # Bootstrap the floating network in the Networks table
        floating_network = db.get_network('floating')
        if not floating_network:
            db.create_floating_network(config.get('FLOATING_NETWORK'))
            floating_network = net.from_db('floating')

        subst = {
            'physical_bridge':
            util.get_safe_interface_name('phy-br-%s' %
                                         config.get('NODE_EGRESS_NIC')),
            'physical_nic':
            config.get('NODE_EGRESS_NIC')
        }

        if not util.check_for_interface(subst['physical_bridge']):
            # NOTE(mikal): Adding the physical interface to the physical bridge
            # is considered outside the scope of the orchestration software as
            # it will cause the node to lose network connectivity. So instead
            # all we do is create a bridge if it doesn't exist and the wire
            # everything up to it. We can do egress NAT in that state, even if
            # floating IPs don't work.
            with util.RecordedOperation('create physical bridge', None):
                # No locking as read only
                ipm = db.get_ipmanager('floating')
                subst['master_float'] = ipm.get_address_at_index(1)
                subst['netmask'] = ipm.netmask

                util.create_interface(subst['physical_bridge'], 'bridge', '')
                util.execute(None,
                             'ip link set %(physical_bridge)s up' % subst)
                util.execute(
                    None, 'ip addr add %(master_float)s/%(netmask)s '
                    'dev %(physical_bridge)s' % subst)

                util.execute(
                    None, 'iptables -A FORWARD -o %(physical_nic)s '
                    '-i %(physical_bridge)s -j ACCEPT' % subst)
                util.execute(
                    None, 'iptables -A FORWARD -i %(physical_nic)s '
                    '-o %(physical_bridge)s -j ACCEPT' % subst)
                util.execute(
                    None, 'iptables -t nat -A POSTROUTING '
                    '-o %(physical_nic)s -j MASQUERADE' % subst)

    def _audit_daemons():
        running_daemons = []
        for pid in DAEMON_PIDS:
            running_daemons.append(DAEMON_PIDS[pid])

        for d in DAEMON_IMPLEMENTATIONS:
            if d not in running_daemons:
                _start_daemon(d)

        for d in DAEMON_PIDS:
            if not psutil.pid_exists(d):
                LOG.warning('%s pid is missing, restarting' % DAEMON_PIDS[d])
                _start_daemon(DAEMON_PIDS[d])

    _audit_daemons()
    restore_instances()

    while True:
        time.sleep(10)

        wpid, _ = os.waitpid(-1, os.WNOHANG)
        while wpid != 0:
            LOG.warning('%s died (pid %d)' %
                        (DAEMON_PIDS.get(wpid, 'unknown'), wpid))
            del DAEMON_PIDS[wpid]
            wpid, _ = os.waitpid(-1, os.WNOHANG)

        _audit_daemons()
        db.see_this_node()
Exemple #10
0
def _get_stats():
    libvirt = util.get_libvirt()
    retval = {}
    conn = libvirt.open(None)

    # CPU info
    present_cpus, _, available_cpus = conn.getCPUMap()
    retval.update({
        'cpu_max': present_cpus,
        'cpu_available': available_cpus,
    })

    retval['cpu_max_per_instance'] = conn.getMaxVcpus(None)

    # This is disabled as data we don't currently use
    # for i in range(present_cpus):
    #    per_cpu_stats = conn.getCPUStats(i)
    #    for key in per_cpu_stats:
    #        retval['cpu_core%d_%s' % (i, key)] = per_cpu_stats[key]

    try:
        load_1, load_5, load_15 = psutil.getloadavg()
        retval.update({
            'cpu_load_1': load_1,
            'cpu_load_5': load_5,
            'cpu_load_15': load_15,
        })
    except Exception as e:
        util.ignore_exception('load average', e)

    # System memory info, converting bytes to mb
    stats = psutil.virtual_memory()
    retval.update({
        'memory_max': stats.total // 1024 // 1024,
        'memory_available': stats.available // 1024 // 1024
    })

    # libvirt memory info, converting kb to mb
    memory_status = conn.getMemoryStats(
        libvirt.VIR_NODE_MEMORY_STATS_ALL_CELLS)
    retval.update({
        'memory_max_libvirt': memory_status['total'] // 1024,
        'memory_available_libvirt': memory_status['free'] // 1024,
    })

    # Kernel Shared Memory (KSM) information
    ksm_details = {}
    for ent in os.listdir('/sys/kernel/mm/ksm'):
        with open('/sys/kernel/mm/ksm/%s' % ent) as f:
            ksm_details['memory_ksm_%s' % ent] = int(f.read().rstrip())
    retval.update(ksm_details)

    # Disk info
    s = os.statvfs(config.get('STORAGE_PATH'))
    disk_counters = psutil.disk_io_counters()
    retval.update({
        'disk_total': s.f_frsize * s.f_blocks,
        'disk_free': s.f_frsize * s.f_bavail,
        'disk_used': s.f_frsize * (s.f_blocks - s.f_bfree),
        'disk_read_bytes': disk_counters.read_bytes,
        'disk_write_bytes': disk_counters.write_bytes,
    })

    # Network info
    net_counters = psutil.net_io_counters()
    retval.update({
        'network_read_bytes': net_counters.bytes_recv,
        'network_write_bytes': net_counters.bytes_sent,
    })

    # Virtual machine consumption info
    total_instances = 0
    total_active_instances = 0
    total_instance_max_memory = 0
    total_instance_actual_memory = 0
    total_instance_vcpus = 0
    total_instance_cpu_time = 0

    for guest in conn.listAllDomains():
        try:
            active = guest.isActive() == 1
            if active:
                _, maxmem, mem, cpus, cpu_time = guest.info()

        except libvirt.libvirtError as e:
            LOG.debug('During resource calc ignored libvirt error: %s' % e)
            active = False

        if active:
            total_instances += 1
            total_active_instances += 1
            total_instance_max_memory += maxmem
            total_instance_actual_memory += mem
            total_instance_vcpus += cpus
            total_instance_cpu_time += cpu_time

    # Queue health statistics
    node_queue_processing, node_queue_waiting = db.get_queue_length(
        config.NODE_NAME)

    retval.update({
        'cpu_total_instance_vcpus':
        total_instance_vcpus,
        'cpu_total_instance_cpu_time':
        total_instance_cpu_time,
        'memory_total_instance_max':
        total_instance_max_memory // 1024,
        'memory_total_instance_actual':
        total_instance_actual_memory // 1024,
        'instances_total':
        total_instances,
        'instances_active':
        total_active_instances,
        'node_queue_processing':
        node_queue_processing,
        'node_queue_waiting':
        node_queue_waiting,
    })

    if util.is_network_node():
        network_queue_processing, network_queue_waiting = db.get_queue_length(
            'networknode')

        retval.update({
            'network_queue_processing': network_queue_processing,
            'network_queue_waiting': network_queue_waiting,
        })

    return retval
Exemple #11
0
def restart_queues():
    # Move things which were in processing back to the queue because
    # we didn't complete them before crashing.
    if util.is_network_node():
        _restart_queue('networknode')
    _restart_queue(config.NODE_NAME)
Exemple #12
0
 def test_is_network_node_no(self):
     self.assertFalse(util.is_network_node())
Exemple #13
0
    def create(self):
        subst = self.subst_dict()

        with db.get_lock('network', None, self.uuid, ttl=120):
            if not util.check_for_interface(subst['vx_interface']):
                with util.RecordedOperation('create vxlan interface', self):
                    util.execute(
                        None,
                        'ip link add %(vx_interface)s type vxlan id %(vx_id)s '
                        'dev %(physical_interface)s dstport 0' % subst)
                    util.execute(
                        None,
                        'sysctl -w net.ipv4.conf.%(vx_interface)s.arp_notify=1'
                        % subst)

            if not util.check_for_interface(subst['vx_bridge']):
                with util.RecordedOperation('create vxlan bridge', self):
                    util.execute(
                        None, 'ip link add %(vx_bridge)s type bridge' % subst)
                    util.execute(
                        None,
                        'ip link set %(vx_interface)s master %(vx_bridge)s' %
                        subst)
                    util.execute(None,
                                 'ip link set %(vx_interface)s up' % subst)
                    util.execute(None, 'ip link set %(vx_bridge)s up' % subst)
                    util.execute(
                        None,
                        'sysctl -w net.ipv4.conf.%(vx_bridge)s.arp_notify=1' %
                        subst)
                    util.execute(None, 'brctl setfd %(vx_bridge)s 0' % subst)
                    util.execute(None, 'brctl stp %(vx_bridge)s off' % subst)
                    util.execute(None,
                                 'brctl setageing %(vx_bridge)s 0' % subst)

        if util.is_network_node():
            if not os.path.exists('/var/run/netns/%(netns)s' % subst):
                with util.RecordedOperation('create netns', self):
                    util.execute(None, 'ip netns add %(netns)s' % subst)

            if not util.check_for_interface(subst['vx_veth_outer']):
                with util.RecordedOperation('create router veth', self):
                    util.execute(
                        None,
                        'ip link add %(vx_veth_outer)s type veth peer name %(vx_veth_inner)s'
                        % subst)
                    util.execute(
                        None, 'ip link set %(vx_veth_inner)s netns %(netns)s' %
                        subst)
                    util.execute(
                        None,
                        'brctl addif %(vx_bridge)s %(vx_veth_outer)s' % subst)
                    util.execute(None,
                                 'ip link set %(vx_veth_outer)s up' % subst)
                    util.execute(
                        None, '%(in_netns)s ip link set %(vx_veth_inner)s up' %
                        subst)
                    util.execute(
                        None,
                        '%(in_netns)s ip addr add %(router)s/%(netmask)s dev %(vx_veth_inner)s'
                        % subst)

            if not util.check_for_interface(subst['physical_veth_outer']):
                with util.RecordedOperation('create physical veth', self):
                    util.execute(
                        None,
                        'ip link add %(physical_veth_outer)s type veth peer name '
                        '%(physical_veth_inner)s' % subst)
                    util.execute(
                        None,
                        'brctl addif %(physical_bridge)s %(physical_veth_outer)s'
                        % subst)
                    util.execute(
                        None, 'ip link set %(physical_veth_outer)s up' % subst)
                    util.execute(
                        None,
                        'ip link set %(physical_veth_inner)s netns %(netns)s' %
                        subst)

            self.deploy_nat()
            self.update_dhcp()
        else:
            db.enqueue('networknode', {
                'type': 'deploy',
                'network_uuid': self.uuid
            })
            db.add_event('network', self.uuid, 'deploy', 'enqueued', None,
                         None)
Exemple #14
0
 def test_is_network_node_no(self):
     config.parsed.parse()
     self.assertFalse(util.is_network_node())
Exemple #15
0
 def test_is_network_node_yes(self):
     config.parsed.parse()
     self.assertTrue(util.is_network_node())
Exemple #16
0
    def _maintain_networks(self):
        LOG.info('Maintaining networks')

        # Discover what networks are present
        _, _, vxid_to_mac = util.discover_interfaces()

        # Determine what networks we should be on
        host_networks = []
        seen_vxids = []

        if not util.is_network_node():
            # For normal nodes, just the ones we have instances for
            for inst in list(db.get_instances(only_node=config.parsed.get('NODE_NAME'))):
                for iface in db.get_instance_interfaces(inst['uuid']):
                    if not iface['network_uuid'] in host_networks:
                        host_networks.append(iface['network_uuid'])
        else:
            # For network nodes, its all networks
            for n in db.get_networks():
                host_networks.append(n['uuid'])

                # Network nodes also look for interfaces for absent instances
                # and delete them
                for ni in db.get_network_interfaces(n['uuid']):
                    inst = db.get_instance(ni['instance_uuid'])
                    if (not inst
                            or inst.get('state', 'unknown') in ['deleted', 'error', 'unknown']):
                        db.hard_delete_network_interface(ni['uuid'])
                        LOG.withInstance(
                            ni['instance_uuid']).withNetworkInterface(
                            ni['uuid']).info('Hard deleted stray network interface')

        # Ensure we are on every network we have a host for
        for network in host_networks:
            try:
                n = net.from_db(network)
                if not n:
                    continue

                if n.db_entry['state_updated'] - time.time() < 60:
                    # Network state changed in the last minute, punt for now
                    continue

                if not n.is_okay():
                    LOG.withObj(n).info('Recreating not okay network')
                    n.create()

                n.ensure_mesh()
                seen_vxids.append(n.vxlan_id)

            except exceptions.LockException as e:
                LOG.warning(
                    'Failed to acquire lock while maintaining networks: %s' % e)

        # Determine if there are any extra vxids
        extra_vxids = set(vxid_to_mac.keys()) - set(seen_vxids)

        # Delete "deleted" SF networks and log unknown vxlans
        if extra_vxids:
            LOG.withField('vxids', extra_vxids).warning(
                'Extra vxlans present!')

            # Determine the network uuids for those vxids
            # vxid_to_uuid = {}
            # for n in db.get_networks():
            #     vxid_to_uuid[n['vxid']] = n['uuid']

            # for extra in extra_vxids:
            #     if extra in vxid_to_uuid:
            #         with db.get_lock('network', None, vxid_to_uuid[extra],
            #                          ttl=120, op='Network reap VXLAN'):
            #             n = net.from_db(vxid_to_uuid[extra])
            #             n.delete()
            #             LOG.info('Extra vxlan %s (network %s) removed.'
            #                      % (extra, vxid_to_uuid[extra]))
            #     else:
            #         LOG.error('Extra vxlan %s does not map to any network.'
            #                   % extra)

        # And record vxids in the database
        db.persist_node_vxid_mapping(
            config.parsed.get('NODE_NAME'), vxid_to_mac)
Exemple #17
0
 def test_is_network_node_yes(self):
     self.assertTrue(util.is_network_node())