Exemple #1
0
def _enable_ipv6(netns):
    # Docker disables IPv6 for --net=none containers
    # TODO(apuimedo) remove when it is no longer the case
    try:
        netns = utils.convert_netns(netns)
        path = utils.convert_netns('/proc/self/ns/net')
        self_ns_fd = open(path)
        pyroute2.netns.setns(netns)
        path = utils.convert_netns('/proc/sys/net/ipv6/conf/all/disable_ipv6')
        with open(path, 'w') as disable_ipv6:
            disable_ipv6.write('0')
    except Exception:
        raise
    finally:
        pyroute2.netns.setns(self_ns_fd)
Exemple #2
0
    def connect(self, vif, ifname, netns):
        physnet = vif.physnet

        h_ipdb = b_base.get_ipdb()
        c_ipdb = b_base.get_ipdb(netns)

        pf_names = self._get_host_pf_names(physnet)
        vf_name, vf_index, pf = self._get_available_vf_info(pf_names)

        if not vf_name:
            error_msg = "No free interfaces for pfysnet {} available".format(
                physnet)
            LOG.error(error_msg)
            raise exceptions.CNIError(error_msg)

        if vif.network.should_provide_vlan:
            vlan_id = vif.network.vlan
            self._set_vf_vlan(pf, vf_index, vlan_id)

        with h_ipdb.interfaces[vf_name] as host_iface:
            host_iface.net_ns_fd = utils.convert_netns(netns)

        with c_ipdb.interfaces[vf_name] as iface:
            iface.ifname = ifname
            iface.address = vif.address
            iface.mtu = vif.network.mtu
            iface.up()
Exemple #3
0
    def connect(self, vif, ifname, netns):
        with b_base.get_ipdb() as h_ipdb:
            # NOTE(vikasc): Ideally 'ifname' should be used here but instead a
            # temporary name is being used while creating the device for
            # container in host network namespace. This is because cni expects
            # only 'eth0' as interface name and if host already has an
            # interface named 'eth0', device creation will fail with 'already
            # exists' error.
            temp_name = vif.vif_name

            # TODO(vikasc): evaluate whether we should have stevedore
            #               driver for getting the link device.
            vm_iface_name = config.CONF.binding.link_iface

            args = self._get_iface_create_args(vif)
            with h_ipdb.create(ifname=temp_name,
                               link=h_ipdb.interfaces[vm_iface_name],
                               **args) as iface:
                iface.net_ns_fd = utils.convert_netns(netns)

        with b_base.get_ipdb(netns) as c_ipdb:
            with c_ipdb.interfaces[temp_name] as iface:
                iface.ifname = ifname
                iface.mtu = vif.network.mtu
                iface.address = str(vif.address)
                iface.up()
Exemple #4
0
def get_ipdb(netns=None):
    if netns:
        netns = utils.convert_netns(netns)
        ipdb = pyroute2.IPDB(nl=pyroute2.NetNS(netns))
    else:
        ipdb = pyroute2.IPDB()
    return ipdb
Exemple #5
0
    def connect(self, vif, ifname, netns, container_id):
        physnet = vif.physnet
        pf_names = self._get_host_pf_names(physnet)
        vf_name, vf_index, pf, pci_info = self._get_available_vf_info(pf_names)

        if not vf_name:
            raise exceptions.CNIError(
                "No free interfaces for physnet {} available".format(physnet))

        LOG.debug("Connect {} as {} (port_id={}) in container_id={}".format(
            vf_name, ifname, vif.id, container_id))

        if vif.network.should_provide_vlan:
            vlan_id = vif.network.vlan
            self._set_vf_vlan(pf, vf_index, vlan_id)

        self._set_vf_mac(pf, vf_index, vif.address)

        with b_base.get_ipdb() as h_ipdb, b_base.get_ipdb(netns) as c_ipdb:
            with h_ipdb.interfaces[vf_name] as host_iface:
                host_iface.net_ns_fd = utils.convert_netns(netns)

            with c_ipdb.interfaces[vf_name] as iface:
                iface.ifname = ifname
                iface.mtu = vif.network.mtu
                iface.up()

        self._save_pci_info(vif.id, pci_info)
Exemple #6
0
    def connect(self, vif, ifname, netns, container_id):
        pci = self._choose_pci(vif, ifname, netns)
        vf_name, vf_index, pf, pci_info = self._get_vf_info(pci)

        LOG.debug("Connect {} as {} (port_id={}) in container_id={}".format(
            vf_name, ifname, vif.id, container_id))

        if vif.network.should_provide_vlan:
            vlan_id = vif.network.vlan
            self._set_vf_vlan(pf, vf_index, vlan_id)

        self._set_vf_mac(pf, vf_index, vif.address)

        with b_base.get_ipdb() as h_ipdb, b_base.get_ipdb(netns) as c_ipdb:
            with h_ipdb.interfaces[vf_name] as host_iface:
                host_iface.net_ns_fd = utils.convert_netns(netns)

            with c_ipdb.interfaces[vf_name] as iface:
                iface.ifname = ifname
                iface.mtu = vif.network.mtu
                iface.up()

        pod_link = vif.pod_link
        self._annotate_device(pod_link, pci)

        self._save_pci_info(vif.id, pci_info)
Exemple #7
0
    def connect(self, vif, ifname, netns, container_id):
        # NOTE(vikasc): Ideally 'ifname' should be used here but instead a
        # temporary name is being used while creating the device for
        # container in host network namespace. This is because cni expects
        # only 'eth0' as interface name and if host already has an
        # interface named 'eth0', device creation will fail with 'already
        # exists' error.
        temp_name = vif.vif_name

        # First let's take a peek into the pod namespace and try to remove any
        # leftover interface in case we got restarted before CNI returned to
        # kubelet.
        with b_base.get_ipdb(netns) as c_ipdb:
            self._remove_ifaces(c_ipdb, (temp_name, ifname), netns)

        # We might also have leftover interface in the host netns, let's try to
        # remove it too. This is outside of the main host's IPDB context
        # manager to make sure removal is commited before starting next
        # transaction.
        with b_base.get_ipdb() as h_ipdb:
            self._remove_ifaces(h_ipdb, (temp_name,))

        try:
            with b_base.get_ipdb() as h_ipdb:
                # TODO(vikasc): evaluate whether we should have stevedore
                #               driver for getting the link device.
                vm_iface_name = config.CONF.binding.link_iface

                args = self._get_iface_create_args(vif)
                with h_ipdb.create(ifname=temp_name,
                                   link=h_ipdb.interfaces[vm_iface_name],
                                   **args) as iface:
                    iface.net_ns_fd = utils.convert_netns(netns)
        except pyroute2.NetlinkError as e:
            if e.code == errno.EEXIST:
                # NOTE(dulek): This is related to bug 1854928. It's super-rare,
                #              so aim of this piece is to gater any info useful
                #              for determining when it happens.
                LOG.exception('Creation of pod interface failed, most likely '
                              'due to duplicated VLAN id. This will probably '
                              'cause kuryr-daemon to crashloop. Trying to '
                              'gather debugging information.')

                with b_base.get_ipdb() as h_ipdb:
                    LOG.error('List of host interfaces: %s', h_ipdb.interfaces)

                with b_base.get_ipdb(netns) as c_ipdb:
                    LOG.error('List of pod namespace interfaces: %s',
                              c_ipdb.interfaces)
            raise

        with b_base.get_ipdb(netns) as c_ipdb:
            with c_ipdb.interfaces[temp_name] as iface:
                iface.ifname = ifname
                iface.mtu = vif.network.mtu
                iface.address = str(vif.address)
                iface.up()
Exemple #8
0
    def connect(self, vif, ifname, netns, container_id):
        # NOTE(vikasc): Ideally 'ifname' should be used here but instead a
        # temporary name is being used while creating the device for
        # container in host network namespace. This is because cni expects
        # only 'eth0' as interface name and if host already has an
        # interface named 'eth0', device creation will fail with 'already
        # exists' error.
        temp_name = vif.vif_name

        # First let's take a peek into the pod namespace and try to remove any
        # leftover interface in case we got restarted before CNI returned to
        # kubelet.
        with b_base.get_ipdb(netns) as c_ipdb:
            self._remove_ifaces(c_ipdb, (temp_name, ifname), netns)

        # We might also have leftover interface in the host netns, let's try to
        # remove it too. This is outside of the main host's IPDB context
        # manager to make sure removal is commited before starting next
        # transaction.
        with b_base.get_ipdb() as h_ipdb:
            self._remove_ifaces(h_ipdb, (temp_name, ))

        with b_base.get_ipdb() as h_ipdb:
            # TODO(vikasc): evaluate whether we should have stevedore
            #               driver for getting the link device.
            vm_iface_name = self._detect_iface_name(h_ipdb)
            mtu = h_ipdb.interfaces[vm_iface_name].mtu
            if mtu < vif.network.mtu:
                # NOTE(dulek): This might happen if Neutron and DHCP agent
                # have different MTU settings. See
                # https://bugs.launchpad.net/kuryr-kubernetes/+bug/1863212
                raise exceptions.CNIBindingFailure(
                    f'MTU of interface {vm_iface_name} ({mtu}) is smaller '
                    f'than MTU of pod network {vif.network.id} '
                    f'({vif.network.mtu}). Please make sure pod network '
                    f'has the same or smaller MTU as node (VM) network.')

            args = self._get_iface_create_args(vif)
            with h_ipdb.create(ifname=temp_name,
                               link=h_ipdb.interfaces[vm_iface_name],
                               **args) as iface:
                iface.net_ns_fd = utils.convert_netns(netns)

        with b_base.get_ipdb(netns) as c_ipdb:
            with c_ipdb.interfaces[temp_name] as iface:
                iface.ifname = ifname
                iface.mtu = vif.network.mtu
                iface.address = str(vif.address)
                iface.up()
    def _move_to_netns(self, ifname, netns, vif, vf_name, vf_index, pf):
        if vf_index and pf:
            if vif.network.should_provide_vlan:
                vlan_id = vif.network.vlan
                self._set_vf_vlan(pf, vf_index, vlan_id)

        self._set_vf_mac(pf, vf_index, vif.address)

        with b_base.get_ipdb() as h_ipdb, b_base.get_ipdb(netns) as c_ipdb:
            with h_ipdb.interfaces[vf_name] as host_iface:
                host_iface.net_ns_fd = utils.convert_netns(netns)

            with c_ipdb.interfaces[vf_name] as iface:
                iface.ifname = ifname
                iface.mtu = vif.network.mtu
                iface.up()
Exemple #10
0
    def connect(self, vif, ifname, netns, container_id):
        # NOTE(vikasc): Ideally 'ifname' should be used here but instead a
        # temporary name is being used while creating the device for
        # container in host network namespace. This is because cni expects
        # only 'eth0' as interface name and if host already has an
        # interface named 'eth0', device creation will fail with 'already
        # exists' error.
        temp_name = vif.vif_name

        # First let's take a peek into the pod namespace and try to remove any
        # leftover interface in case we got restarted before CNI returned to
        # kubelet.
        with b_base.get_ipdb(netns) as c_ipdb:
            self._remove_ifaces(c_ipdb, (temp_name, ifname), netns)

        with b_base.get_ipdb() as h_ipdb:
            # TODO(vikasc): evaluate whether we should have stevedore
            #               driver for getting the link device.
            vm_iface_name = config.CONF.binding.link_iface

            # We might also have leftover interface in the host netns, let's
            # try to remove it too.
            self._remove_ifaces(h_ipdb, (temp_name,))

            args = self._get_iface_create_args(vif)
            with h_ipdb.create(ifname=temp_name,
                               link=h_ipdb.interfaces[vm_iface_name],
                               **args) as iface:
                iface.net_ns_fd = utils.convert_netns(netns)

        with b_base.get_ipdb(netns) as c_ipdb:
            with c_ipdb.interfaces[temp_name] as iface:
                iface.ifname = ifname
                iface.mtu = vif.network.mtu
                iface.address = str(vif.address)
                iface.up()
Exemple #11
0
    def _cleanup_conflicting_vlan(self, netns, vlan_id):
        if vlan_id is None:
            # Better to not attempt that, might remove way to much.
            return

        netns_paths = []
        handled_netns = set()
        with b_base.get_ipdb() as h_ipdb:
            vm_iface_name = self._detect_iface_name(h_ipdb)
            vm_iface_index = h_ipdb.interfaces[vm_iface_name].index

        if netns.startswith('/proc'):
            # Paths have /proc/<pid>/ns/net pattern, we need to iterate
            # over /proc.
            netns_dir = utils.convert_netns('/proc')
            for pid in os.listdir(netns_dir):
                if not pid.isdigit():
                    # Ignore all the non-pid stuff in /proc
                    continue
                netns_paths.append(os.path.join(netns_dir, pid, 'ns/net'))
        else:
            # cri-o manages netns, they're in /var/run/netns/* or similar.
            netns_dir = os.path.dirname(netns)
            netns_paths = os.listdir(netns_dir)
            netns_paths = [
                os.path.join(netns_dir, netns_path)
                for netns_path in netns_paths
            ]

        for netns_path in netns_paths:
            netns_path = os.fsdecode(netns_path)
            try:
                # NOTE(dulek): inode can be used to clearly distinguish the
                #              netns' as `man namespaces` says:
                #
                # Since Linux 3.8, they appear as symbolic links.  If two
                # processes are in the same namespace, then the device IDs and
                # inode numbers of their /proc/[pid]/ns/xxx symbolic links will
                # be the same; an application can check this using the
                # stat.st_dev and stat.st_ino fields returned by stat(2).
                netns_stat = os.stat(netns_path)
                netns_id = netns_stat.st_dev, netns_stat.st_ino
            except OSError:
                continue
            if netns_id in handled_netns:
                continue
            handled_netns.add(netns_id)

            try:
                with b_base.get_ipdb(netns_path) as c_ipdb:
                    for ifname, iface in c_ipdb.interfaces.items():
                        if (iface.vlan_id == vlan_id
                                and iface.link == vm_iface_index):
                            LOG.warning(
                                f'Found offending interface {ifname} with '
                                f'VLAN ID {vlan_id} in netns {netns_path}. '
                                f'Trying to remove it.')
                            with c_ipdb.interfaces[ifname] as found_iface:
                                found_iface.remove()
                            break
            except OSError:
                continue
    def connect(self, vif, ifname, netns, container_id):
        # NOTE(vikasc): Ideally 'ifname' should be used here but instead a
        # temporary name is being used while creating the device for
        # container in host network namespace. This is because cni expects
        # only 'eth0' as interface name and if host already has an
        # interface named 'eth0', device creation will fail with 'already
        # exists' error.
        temp_name = vif.vif_name

        # First let's take a peek into the pod namespace and try to remove any
        # leftover interface in case we got restarted before CNI returned to
        # kubelet.
        with b_base.get_ipdb(netns) as c_ipdb:
            self._remove_ifaces(c_ipdb, (temp_name, ifname), netns)

        # We might also have leftover interface in the host netns, let's try to
        # remove it too. This is outside of the main host's IPDB context
        # manager to make sure removal is commited before starting next
        # transaction.
        with b_base.get_ipdb() as h_ipdb:
            self._remove_ifaces(h_ipdb, (temp_name, ))

        try:
            with b_base.get_ipdb() as h_ipdb:
                # TODO(vikasc): evaluate whether we should have stevedore
                #               driver for getting the link device.
                vm_iface_name = self._detect_iface_name(h_ipdb)
                mtu = h_ipdb.interfaces[vm_iface_name].mtu
                if mtu != vif.network.mtu:
                    # NOTE(dulek): This might happen if Neutron and DHCP agent
                    # have different MTU settings. See
                    # https://bugs.launchpad.net/kuryr-kubernetes/+bug/1863212
                    raise exceptions.CNIBindingFailure(
                        f'MTU of interface {vm_iface_name} ({mtu}) does not '
                        f'match MTU of pod network {vif.network.id} '
                        f'({vif.network.mtu}). Please make sure pod network '
                        f'has the same MTU as node (VM) network.')

                args = self._get_iface_create_args(vif)
                with h_ipdb.create(ifname=temp_name,
                                   link=h_ipdb.interfaces[vm_iface_name],
                                   **args) as iface:
                    iface.net_ns_fd = utils.convert_netns(netns)
        except pyroute2.NetlinkError as e:
            if e.code == errno.EEXIST:
                # NOTE(dulek): This is related to bug 1854928. It's super-rare,
                #              so aim of this piece is to gater any info useful
                #              for determining when it happens.
                LOG.exception(f'Creation of pod interface failed due to VLAN '
                              f'ID (vlan_info={args}) conflict. Probably the '
                              f'CRI had not cleaned up the network namespace '
                              f'of deleted pods. This should not be a '
                              f'permanent issue but may cause restart of '
                              f'kuryr-cni pod.')
            raise

        with b_base.get_ipdb(netns) as c_ipdb:
            with c_ipdb.interfaces[temp_name] as iface:
                iface.ifname = ifname
                iface.mtu = vif.network.mtu
                iface.address = str(vif.address)
                iface.up()