Exemple #1
0
 def test_from_db(self, mock_get, mock_time):
     ipm = IPManager.from_db('floating')
     self.assertEqual(
         {
             '192.168.20.0': {
                 'user': ('ipmanager', 'uuid'),
                 'when': 1632261535.027476
             },
             '192.168.20.1': {
                 'user': ('ipmanager', 'uuid'),
                 'when': 1632261535.027476
             },
             '192.168.20.101': {
                 'user': ('ipmanager', 'uuid'),
                 'when': 1632261535.027476
             },
             '192.168.20.255': {
                 'user': ('ipmanager', 'uuid'),
                 'when': 1632261535.027476
             },
             '192.168.20.75': {
                 'user': ('ipmanager', 'uuid'),
                 'when': 1632261535.027476
             }
         }, ipm.in_use)
Exemple #2
0
    def delete_on_network_node(self):
        with self.get_lock(op='Network delete'):
            subst = self.subst_dict()

            if util_network.check_for_interface(subst['vx_veth_outer']):
                with util_general.RecordedOperation('delete router veth', self):
                    util_process.execute(
                        None, 'ip link delete %(vx_veth_outer)s' % subst)

            if util_network.check_for_interface(subst['egress_veth_outer']):
                with util_general.RecordedOperation('delete egress veth', self):
                    util_process.execute(
                        None,
                        'ip link delete %(egress_veth_outer)s' % subst)

            if os.path.exists('/var/run/netns/%s' % self.uuid):
                with util_general.RecordedOperation('delete netns', self):
                    util_process.execute(
                        None, 'ip netns del %s' % self.uuid)

            if self.floating_gateway:
                with db.get_lock('ipmanager', None, 'floating', ttl=120,
                                 op='Network delete'):
                    ipm = IPManager.from_db('floating')
                    ipm.release(self.floating_gateway)
                    ipm.persist()
                    self.update_floating_gateway(None)

            self.state = self.STATE_DELETED

        # Ensure that all hypervisors remove this network. This is really
        # just catching strays, apart from on the network node where we
        # absolutely need to do this thing.
        for hyp in Nodes([active_nodes]):
            etcd.enqueue(hyp.uuid,
                         {'tasks': [
                             HypervisorDestroyNetworkTask(self.uuid)
                         ]})

        self.remove_dhcp()
        self.remove_nat()

        ipm = IPManager.from_db(self.uuid)
        ipm.delete()
Exemple #3
0
    def remove_nat(self):
        if config.NODE_IS_NETWORK_NODE:
            if self.floating_gateway:
                with db.get_lock('ipmanager', None, 'floating', ttl=120,
                                 op='Remove NAT'):
                    ipm = IPManager.from_db('floating')
                    ipm.release(self.floating_gateway)
                    ipm.persist()
                    self.update_floating_gateway(None)

        else:
            etcd.enqueue('networknode', RemoveNATNetworkTask(self.uuid))
Exemple #4
0
def assign_floating_ip(ni):
    float_net = net.Network.from_db('floating')
    if not float_net:
        return api_base.error(404, 'floating network not found')

    # Address is allocated and added to the record here, so the job has it later.
    db.add_event('interface', ni.uuid, 'api', 'float', None, None)
    with db.get_lock('ipmanager', None, 'floating', ttl=120, op='Interface float'):
        ipm = IPManager.from_db('floating')
        addr = ipm.get_random_free_address(ni.unique_label())
        ipm.persist()

    ni.floating = addr
    def delete(self):
        if self.floating['floating_address']:
            etcd.enqueue(
                'networknode',
                DefloatNetworkInterfaceTask(self.network_uuid, self.uuid))

        with db.get_lock('ipmanager', None, self.network_uuid,
                         ttl=120, op='Release fixed IP'):
            ipm = IPManager.from_db(self.network_uuid)
            ipm.release(self.ipv4)
            ipm.persist()

        self.state = dbo.STATE_DELETED
Exemple #6
0
    def _process_networkinterface_workitem(self, log_ctx, workitem):
        log_ctx = log_ctx.with_networkinterface(workitem.interface_uuid())
        n = net.Network.from_db(workitem.network_uuid())
        if not n:
            log_ctx.warning('Received work item for non-existent network')
            return

        ni = NetworkInterface.from_db(workitem.interface_uuid())
        if not ni:
            log_ctx.warning(
                'Received work item for non-existent network interface')
            return

        # Tasks that should not operate on a dead or delete waiting network
        if n.is_dead() and n.state.value != net.Network.STATE_DELETE_WAIT:
            log_ctx.with_fields({'state': n.state,
                                 'workitem': workitem}).info(
                'Received work item for a completely dead network')
            return

        if isinstance(workitem, DefloatNetworkInterfaceTask):
            n.remove_floating_ip(ni.floating.get('floating_address'), ni.ipv4)

            db.add_event('interface', ni.uuid, 'api', 'defloat', None, None)
            with db.get_lock('ipmanager', None, 'floating', ttl=120, op='Instance defloat'):
                ipm = IPManager.from_db('floating')
                ipm.release(ni.floating.get('floating_address'))
                ipm.persist()

            ni.floating = None

        # Tasks that should not operate on a dead network
        if n.is_dead():
            log_ctx.with_fields({'state': n.state,
                                 'workitem': workitem}).info(
                'Received work item for a dead network')
            return

        if isinstance(workitem, FloatNetworkInterfaceTask):
            n.add_floating_ip(ni.floating.get('floating_address'), ni.ipv4)
Exemple #7
0
    def __init__(self, static_values):
        super(Network, self).__init__(static_values.get('uuid'),
                                      static_values.get('version'))

        self.__name = static_values.get('name')
        self.__namespace = static_values.get('namespace')
        self.__netblock = static_values.get('netblock')
        self.__provide_dhcp = static_values.get('provide_dhcp')
        self.__provide_nat = static_values.get('provide_nat')
        self.__vxid = static_values.get('vxid')

        self.egress_nic = static_values.get(
            'egress_nic', config.NODE_EGRESS_NIC)
        self.mesh_nic = static_values.get(
            'mesh_nic', config.NODE_MESH_NIC)

        ipm = IPManager.from_db(self.uuid)
        self.__ipblock = ipm.network_address
        self.__router = ipm.get_address_at_index(1)
        self.__dhcp_start = ipm.get_address_at_index(2)
        self.__netmask = ipm.netmask
        self.__broadcast = ipm.broadcast_address
        self.__network_address = ipm.network_address
Exemple #8
0
    def _reap_leaked_floating_ips(self):
        # Block until the network node queue is idle to avoid races
        processing, waiting = etcd.get_queue_length('networknode')
        while processing + waiting > 0:
            self.exit.wait(60)
            processing, waiting = etcd.get_queue_length('networknode')

        # Ensure we haven't leaked any floating IPs (because we used to)
        with db.get_lock('ipmanager', None, 'floating', ttl=120,
                         op='Cleanup leaks'):
            floating_ipm = IPManager.from_db('floating')

            # Collect floating gateways and floating IPs, while ensuring that
            # they are correctly reserved on the floating network as well
            floating_gateways = []
            for n in net.Networks([baseobject.active_states_filter]):
                fg = n.floating_gateway
                if fg:
                    floating_gateways.append(fg)
                    if floating_ipm.is_free(fg):
                        floating_ipm.reserve(fg, n.unique_label())
                        floating_ipm.persist()
                        LOG.with_fields({
                            'network': n.uuid,
                            'address': fg
                        }).error('Floating gateway not reserved correctly')

            LOG.info('Found floating gateways: %s' % floating_gateways)

            floating_addresses = []
            for ni in networkinterface.NetworkInterfaces([baseobject.active_states_filter]):
                fa = ni.floating.get('floating_address')
                if fa:
                    floating_addresses.append(fa)
                    if floating_ipm.is_free(fa):
                        floating_ipm.reserve(fa, ni.unique_label())
                        floating_ipm.persist()
                        LOG.with_fields({
                            'networkinterface': ni.uuid,
                            'address': fa
                        }).error('Floating address not reserved correctly')
            LOG.info('Found floating addresses: %s' % floating_addresses)

            floating_reserved = [
                floating_ipm.get_address_at_index(0),
                floating_ipm.get_address_at_index(1),
                floating_ipm.broadcast_address,
                floating_ipm.network_address
            ]
            LOG.info('Found floating reservations: %s' % floating_reserved)

            # Now the reverse check. Test if there are any reserved IPs which
            # are not actually in use. Free any we find.
            leaks = []
            for ip in floating_ipm.in_use:
                if ip not in itertools.chain(floating_gateways,
                                             floating_addresses,
                                             floating_reserved):
                    LOG.error('Floating IP %s has leaked.' % ip)

                    # This IP needs to have been allocated more than 300 seconds
                    # ago to ensure that the network setup isn't still queueud.
                    if time.time() - floating_ipm.in_use[ip]['when'] > 300:
                        leaks.append(ip)

            for ip in leaks:
                LOG.error('Leaked floating IP %s has been released.' % ip)
                floating_ipm.release(ip)
            floating_ipm.persist()
Exemple #9
0
def main():
    global DAEMON_IMPLEMENTATIONS
    global DAEMON_PIDS

    LOG.info('Starting...')
    setproctitle.setproctitle(
        daemon.process_name('main') + '-v%s' % util_general.get_version())

    # If you ran this, it means we're not shutting down any more
    n = Node.new(config.NODE_NAME, config.NODE_MESH_IP)
    n.state = Node.STATE_CREATED

    # Log configuration on startup
    for key, value in config.dict().items():
        LOG.info('Configuration item %s = %s' % (key, value))

    daemon.set_log_level(LOG, 'main')

    # Check in early and often, also reset processing queue items.
    etcd.clear_stale_locks()
    Node.observe_this_node()
    etcd.restart_queues()

    def _start_daemon(d):
        pid = os.fork()
        if pid == 0:
            try:
                DAEMON_IMPLEMENTATIONS[d].Monitor(d).run()
                sys.exit(0)
            except Exception as e:
                util_general.ignore_exception('daemon creation', e)
                sys.exit(1)

        DAEMON_PIDS[pid] = d
        LOG.with_field('pid', pid).info('Started %s' % d)

    # Resource usage publisher, we need this early because scheduling decisions
    # might happen quite early on.
    _start_daemon('resources')

    # If I am the network node, I need some setup
    if config.NODE_IS_NETWORK_NODE:
        # Bootstrap the floating network in the Networks table
        floating_network = net.Network.from_db('floating')
        if not floating_network:
            floating_network = net.Network.create_floating_network(
                config.FLOATING_NETWORK)

        subst = {
            'egress_bridge': util_network.get_safe_interface_name(
                'egr-br-%s' % config.NODE_EGRESS_NIC),
            'egress_nic': config.NODE_EGRESS_NIC
        }

        if not util_network.check_for_interface(subst['egress_bridge']):
            # NOTE(mikal): Adding the physical interface to the physical bridge
            # is considered outside the scope of the orchestration software as
            # it will cause the node to lose network connectivity. So instead
            # all we do is create a bridge if it doesn't exist and the wire
            # everything up to it. We can do egress NAT in that state, even if
            # floating IPs don't work.
            with util_general.RecordedOperation('create physical bridge', None):
                # No locking as read only
                ipm = IPManager.from_db('floating')
                subst['master_float'] = ipm.get_address_at_index(1)
                subst['netmask'] = ipm.netmask

                # We need to copy the MTU of the interface we are bridging to
                # or weird networking things happen.
                mtu = util_network.get_interface_mtu(config.NODE_EGRESS_NIC)

                util_network.create_interface(
                    subst['egress_bridge'], 'bridge', '', mtu=mtu)

                util_process.execute(None,
                                     'ip link set %(egress_bridge)s up' % subst)
                util_process.execute(None,
                                     'ip addr add %(master_float)s/%(netmask)s '
                                     'dev %(egress_bridge)s' % subst)

                util_process.execute(None,
                                     'iptables -A FORWARD -o %(egress_nic)s '
                                     '-i %(egress_bridge)s -j ACCEPT' % subst)
                util_process.execute(None,
                                     'iptables -A FORWARD -i %(egress_nic)s '
                                     '-o %(egress_bridge)s -j ACCEPT' % subst)
                util_process.execute(None,
                                     'iptables -t nat -A POSTROUTING '
                                     '-o %(egress_nic)s -j MASQUERADE' % subst)

    def _audit_daemons():
        running_daemons = []
        for pid in DAEMON_PIDS:
            running_daemons.append(DAEMON_PIDS[pid])

        for d in DAEMON_IMPLEMENTATIONS:
            if d not in running_daemons:
                _start_daemon(d)

        for d in list(DAEMON_PIDS):
            if not psutil.pid_exists(d):
                LOG.warning('%s pid is missing, restarting' % DAEMON_PIDS[d])
                _start_daemon(DAEMON_PIDS[d])

    _audit_daemons()
    restore_instances()

    running = True
    while True:
        time.sleep(5)

        try:
            wpid, _ = os.waitpid(-1, os.WNOHANG)
            while wpid != 0:
                LOG.warning('%s exited (pid %d)'
                            % (DAEMON_PIDS.get(wpid, 'unknown'), wpid))
                if wpid in DAEMON_PIDS:
                    del DAEMON_PIDS[wpid]
                wpid, _ = os.waitpid(-1, os.WNOHANG)

        except ChildProcessError:
            # We get this if there are no child processes
            pass

        n = Node.from_db(config.NODE_NAME)
        if n.state.value not in [Node.STATE_STOPPING, Node.STATE_STOPPED]:
            _audit_daemons()
            Node.observe_this_node()

        elif len(DAEMON_PIDS) == 0:
            n.state = Node.STATE_STOPPED
            return

        else:
            if running:
                for pid in DAEMON_PIDS:
                    try:
                        os.kill(pid, signal.SIGTERM)
                        LOG.info('Sent SIGTERM to %s (pid %s)'
                                 % (DAEMON_PIDS.get(pid, 'unknown'), pid))
                    except OSError as e:
                        LOG.warn('Failed to send SIGTERM to %s: %s' % (pid, e))

            running = False
Exemple #10
0
    def create_on_network_node(self):
        # The floating network does not have a vxlan mesh
        if self.uuid == 'floating':
            return

        with self.get_lock(op='create_on_network_node'):
            if self.is_dead():
                raise DeadNetwork('network=%s' % self)

            self._create_common()

            subst = self.subst_dict()
            if not os.path.exists('/var/run/netns/%s' % self.uuid):
                with util_general.RecordedOperation('create netns', self):
                    util_process.execute(None, 'ip netns add %s' % self.uuid)

            if not util_network.check_for_interface(subst['vx_veth_outer']):
                with util_general.RecordedOperation('create router veth', self):
                    util_network.create_interface(
                        subst['vx_veth_outer'], 'veth',
                        'peer name %(vx_veth_inner)s' % subst)
                    util_process.execute(
                        None, 'ip link set %(vx_veth_inner)s netns %(netns)s' % subst)

                    # Refer to bug 952 for more details here, but it turns out
                    # that adding an interface to a bridge overwrites the MTU of
                    # the bridge in an undesirable way. So we lookup the existing
                    # MTU and then re-specify it here.
                    subst['vx_bridge_mtu'] = util_network.get_interface_mtu(
                        subst['vx_bridge'])
                    util_process.execute(
                        None,
                        'ip link set %(vx_veth_outer)s master %(vx_bridge)s '
                        'mtu %(vx_bridge_mtu)s' % subst)

                    util_process.execute(
                        None, 'ip link set %(vx_veth_outer)s up' % subst)
                    util_process.execute(
                        None, 'ip link set %(vx_veth_inner)s up' % subst,
                        namespace=self.uuid)
                    util_process.execute(
                        None,
                        'ip addr add %(router)s/%(netmask)s '
                        'dev %(vx_veth_inner)s' % subst,
                        namespace=self.uuid)

            if not util_network.check_for_interface(subst['egress_veth_outer']):
                with util_general.RecordedOperation('create egress veth', self):
                    util_network.create_interface(
                        subst['egress_veth_outer'], 'veth',
                        'peer name %(egress_veth_inner)s' % subst)

                    # Refer to bug 952 for more details here, but it turns out
                    # that adding an interface to a bridge overwrites the MTU of
                    # the bridge in an undesirable way. So we lookup the existing
                    # MTU and then re-specify it here.
                    subst['egress_bridge_mtu'] = util_network.get_interface_mtu(
                        subst['egress_bridge'])
                    util_process.execute(
                        None,
                        'ip link set %(egress_veth_outer)s master %(egress_bridge)s '
                        'mtu %(egress_bridge_mtu)s' % subst)

                    util_process.execute(
                        None, 'ip link set %(egress_veth_outer)s up' % subst)
                    util_process.execute(
                        None, 'ip link set %(egress_veth_inner)s netns %(netns)s' % subst)

            if self.provide_nat:
                # We don't always need this lock, but acquiring it here means
                # we don't need to construct two identical ipmanagers one after
                # the other.
                with db.get_lock('ipmanager', None, 'floating', ttl=120,
                                 op='Network deploy NAT'):
                    ipm = IPManager.from_db('floating')
                    if not self.floating_gateway:
                        self.update_floating_gateway(
                            ipm.get_random_free_address(self.unique_label()))
                        ipm.persist()

                    subst['floating_router'] = ipm.get_address_at_index(1)
                    subst['floating_gateway'] = self.floating_gateway
                    subst['floating_netmask'] = ipm.netmask

                with util_general.RecordedOperation('enable virtual routing', self):
                    addresses = util_network.get_interface_addresses(
                        subst['egress_veth_inner'], namespace=subst['netns'])
                    if not subst['floating_gateway'] in list(addresses):
                        util_process.execute(
                            None,
                            'ip addr add %(floating_gateway)s/%(floating_netmask)s '
                            'dev %(egress_veth_inner)s' % subst,
                            namespace=self.uuid)
                        util_process.execute(
                            None, 'ip link set  %(egress_veth_inner)s up' % subst,
                            namespace=self.uuid)

                    default_routes = util_network.get_default_routes(
                        subst['netns'])
                    if default_routes != [subst['floating_router']]:
                        if default_routes:
                            for default_route in default_routes:
                                util_process.execute(
                                    None, 'route del default gw %s' % default_route,
                                    namespace=self.uuid)

                        util_process.execute(
                            None, 'route add default gw %(floating_router)s' % subst,
                            namespace=self.uuid)

                self.enable_nat()

        self.update_dhcp()

        # A final check to ensure we haven't raced with a delete
        if self.is_dead():
            raise DeadNetwork('network=%s' % self)
        self.state = self.STATE_CREATED