def _enable_ipv6(netns): # Docker disables IPv6 for --net=none containers # TODO(apuimedo) remove when it is no longer the case try: netns = utils.convert_netns(netns) path = utils.convert_netns('/proc/self/ns/net') self_ns_fd = open(path) pyroute2.netns.setns(netns) path = utils.convert_netns('/proc/sys/net/ipv6/conf/all/disable_ipv6') with open(path, 'w') as disable_ipv6: disable_ipv6.write('0') except Exception: raise finally: pyroute2.netns.setns(self_ns_fd)
def connect(self, vif, ifname, netns): physnet = vif.physnet h_ipdb = b_base.get_ipdb() c_ipdb = b_base.get_ipdb(netns) pf_names = self._get_host_pf_names(physnet) vf_name, vf_index, pf = self._get_available_vf_info(pf_names) if not vf_name: error_msg = "No free interfaces for pfysnet {} available".format( physnet) LOG.error(error_msg) raise exceptions.CNIError(error_msg) if vif.network.should_provide_vlan: vlan_id = vif.network.vlan self._set_vf_vlan(pf, vf_index, vlan_id) with h_ipdb.interfaces[vf_name] as host_iface: host_iface.net_ns_fd = utils.convert_netns(netns) with c_ipdb.interfaces[vf_name] as iface: iface.ifname = ifname iface.address = vif.address iface.mtu = vif.network.mtu iface.up()
def connect(self, vif, ifname, netns): with b_base.get_ipdb() as h_ipdb: # NOTE(vikasc): Ideally 'ifname' should be used here but instead a # temporary name is being used while creating the device for # container in host network namespace. This is because cni expects # only 'eth0' as interface name and if host already has an # interface named 'eth0', device creation will fail with 'already # exists' error. temp_name = vif.vif_name # TODO(vikasc): evaluate whether we should have stevedore # driver for getting the link device. vm_iface_name = config.CONF.binding.link_iface args = self._get_iface_create_args(vif) with h_ipdb.create(ifname=temp_name, link=h_ipdb.interfaces[vm_iface_name], **args) as iface: iface.net_ns_fd = utils.convert_netns(netns) with b_base.get_ipdb(netns) as c_ipdb: with c_ipdb.interfaces[temp_name] as iface: iface.ifname = ifname iface.mtu = vif.network.mtu iface.address = str(vif.address) iface.up()
def get_ipdb(netns=None): if netns: netns = utils.convert_netns(netns) ipdb = pyroute2.IPDB(nl=pyroute2.NetNS(netns)) else: ipdb = pyroute2.IPDB() return ipdb
def connect(self, vif, ifname, netns, container_id): physnet = vif.physnet pf_names = self._get_host_pf_names(physnet) vf_name, vf_index, pf, pci_info = self._get_available_vf_info(pf_names) if not vf_name: raise exceptions.CNIError( "No free interfaces for physnet {} available".format(physnet)) LOG.debug("Connect {} as {} (port_id={}) in container_id={}".format( vf_name, ifname, vif.id, container_id)) if vif.network.should_provide_vlan: vlan_id = vif.network.vlan self._set_vf_vlan(pf, vf_index, vlan_id) self._set_vf_mac(pf, vf_index, vif.address) with b_base.get_ipdb() as h_ipdb, b_base.get_ipdb(netns) as c_ipdb: with h_ipdb.interfaces[vf_name] as host_iface: host_iface.net_ns_fd = utils.convert_netns(netns) with c_ipdb.interfaces[vf_name] as iface: iface.ifname = ifname iface.mtu = vif.network.mtu iface.up() self._save_pci_info(vif.id, pci_info)
def connect(self, vif, ifname, netns, container_id): pci = self._choose_pci(vif, ifname, netns) vf_name, vf_index, pf, pci_info = self._get_vf_info(pci) LOG.debug("Connect {} as {} (port_id={}) in container_id={}".format( vf_name, ifname, vif.id, container_id)) if vif.network.should_provide_vlan: vlan_id = vif.network.vlan self._set_vf_vlan(pf, vf_index, vlan_id) self._set_vf_mac(pf, vf_index, vif.address) with b_base.get_ipdb() as h_ipdb, b_base.get_ipdb(netns) as c_ipdb: with h_ipdb.interfaces[vf_name] as host_iface: host_iface.net_ns_fd = utils.convert_netns(netns) with c_ipdb.interfaces[vf_name] as iface: iface.ifname = ifname iface.mtu = vif.network.mtu iface.up() pod_link = vif.pod_link self._annotate_device(pod_link, pci) self._save_pci_info(vif.id, pci_info)
def connect(self, vif, ifname, netns, container_id): # NOTE(vikasc): Ideally 'ifname' should be used here but instead a # temporary name is being used while creating the device for # container in host network namespace. This is because cni expects # only 'eth0' as interface name and if host already has an # interface named 'eth0', device creation will fail with 'already # exists' error. temp_name = vif.vif_name # First let's take a peek into the pod namespace and try to remove any # leftover interface in case we got restarted before CNI returned to # kubelet. with b_base.get_ipdb(netns) as c_ipdb: self._remove_ifaces(c_ipdb, (temp_name, ifname), netns) # We might also have leftover interface in the host netns, let's try to # remove it too. This is outside of the main host's IPDB context # manager to make sure removal is commited before starting next # transaction. with b_base.get_ipdb() as h_ipdb: self._remove_ifaces(h_ipdb, (temp_name,)) try: with b_base.get_ipdb() as h_ipdb: # TODO(vikasc): evaluate whether we should have stevedore # driver for getting the link device. vm_iface_name = config.CONF.binding.link_iface args = self._get_iface_create_args(vif) with h_ipdb.create(ifname=temp_name, link=h_ipdb.interfaces[vm_iface_name], **args) as iface: iface.net_ns_fd = utils.convert_netns(netns) except pyroute2.NetlinkError as e: if e.code == errno.EEXIST: # NOTE(dulek): This is related to bug 1854928. It's super-rare, # so aim of this piece is to gater any info useful # for determining when it happens. LOG.exception('Creation of pod interface failed, most likely ' 'due to duplicated VLAN id. This will probably ' 'cause kuryr-daemon to crashloop. Trying to ' 'gather debugging information.') with b_base.get_ipdb() as h_ipdb: LOG.error('List of host interfaces: %s', h_ipdb.interfaces) with b_base.get_ipdb(netns) as c_ipdb: LOG.error('List of pod namespace interfaces: %s', c_ipdb.interfaces) raise with b_base.get_ipdb(netns) as c_ipdb: with c_ipdb.interfaces[temp_name] as iface: iface.ifname = ifname iface.mtu = vif.network.mtu iface.address = str(vif.address) iface.up()
def connect(self, vif, ifname, netns, container_id): # NOTE(vikasc): Ideally 'ifname' should be used here but instead a # temporary name is being used while creating the device for # container in host network namespace. This is because cni expects # only 'eth0' as interface name and if host already has an # interface named 'eth0', device creation will fail with 'already # exists' error. temp_name = vif.vif_name # First let's take a peek into the pod namespace and try to remove any # leftover interface in case we got restarted before CNI returned to # kubelet. with b_base.get_ipdb(netns) as c_ipdb: self._remove_ifaces(c_ipdb, (temp_name, ifname), netns) # We might also have leftover interface in the host netns, let's try to # remove it too. This is outside of the main host's IPDB context # manager to make sure removal is commited before starting next # transaction. with b_base.get_ipdb() as h_ipdb: self._remove_ifaces(h_ipdb, (temp_name, )) with b_base.get_ipdb() as h_ipdb: # TODO(vikasc): evaluate whether we should have stevedore # driver for getting the link device. vm_iface_name = self._detect_iface_name(h_ipdb) mtu = h_ipdb.interfaces[vm_iface_name].mtu if mtu < vif.network.mtu: # NOTE(dulek): This might happen if Neutron and DHCP agent # have different MTU settings. See # https://bugs.launchpad.net/kuryr-kubernetes/+bug/1863212 raise exceptions.CNIBindingFailure( f'MTU of interface {vm_iface_name} ({mtu}) is smaller ' f'than MTU of pod network {vif.network.id} ' f'({vif.network.mtu}). Please make sure pod network ' f'has the same or smaller MTU as node (VM) network.') args = self._get_iface_create_args(vif) with h_ipdb.create(ifname=temp_name, link=h_ipdb.interfaces[vm_iface_name], **args) as iface: iface.net_ns_fd = utils.convert_netns(netns) with b_base.get_ipdb(netns) as c_ipdb: with c_ipdb.interfaces[temp_name] as iface: iface.ifname = ifname iface.mtu = vif.network.mtu iface.address = str(vif.address) iface.up()
def _move_to_netns(self, ifname, netns, vif, vf_name, vf_index, pf): if vf_index and pf: if vif.network.should_provide_vlan: vlan_id = vif.network.vlan self._set_vf_vlan(pf, vf_index, vlan_id) self._set_vf_mac(pf, vf_index, vif.address) with b_base.get_ipdb() as h_ipdb, b_base.get_ipdb(netns) as c_ipdb: with h_ipdb.interfaces[vf_name] as host_iface: host_iface.net_ns_fd = utils.convert_netns(netns) with c_ipdb.interfaces[vf_name] as iface: iface.ifname = ifname iface.mtu = vif.network.mtu iface.up()
def connect(self, vif, ifname, netns, container_id): # NOTE(vikasc): Ideally 'ifname' should be used here but instead a # temporary name is being used while creating the device for # container in host network namespace. This is because cni expects # only 'eth0' as interface name and if host already has an # interface named 'eth0', device creation will fail with 'already # exists' error. temp_name = vif.vif_name # First let's take a peek into the pod namespace and try to remove any # leftover interface in case we got restarted before CNI returned to # kubelet. with b_base.get_ipdb(netns) as c_ipdb: self._remove_ifaces(c_ipdb, (temp_name, ifname), netns) with b_base.get_ipdb() as h_ipdb: # TODO(vikasc): evaluate whether we should have stevedore # driver for getting the link device. vm_iface_name = config.CONF.binding.link_iface # We might also have leftover interface in the host netns, let's # try to remove it too. self._remove_ifaces(h_ipdb, (temp_name,)) args = self._get_iface_create_args(vif) with h_ipdb.create(ifname=temp_name, link=h_ipdb.interfaces[vm_iface_name], **args) as iface: iface.net_ns_fd = utils.convert_netns(netns) with b_base.get_ipdb(netns) as c_ipdb: with c_ipdb.interfaces[temp_name] as iface: iface.ifname = ifname iface.mtu = vif.network.mtu iface.address = str(vif.address) iface.up()
def _cleanup_conflicting_vlan(self, netns, vlan_id): if vlan_id is None: # Better to not attempt that, might remove way to much. return netns_paths = [] handled_netns = set() with b_base.get_ipdb() as h_ipdb: vm_iface_name = self._detect_iface_name(h_ipdb) vm_iface_index = h_ipdb.interfaces[vm_iface_name].index if netns.startswith('/proc'): # Paths have /proc/<pid>/ns/net pattern, we need to iterate # over /proc. netns_dir = utils.convert_netns('/proc') for pid in os.listdir(netns_dir): if not pid.isdigit(): # Ignore all the non-pid stuff in /proc continue netns_paths.append(os.path.join(netns_dir, pid, 'ns/net')) else: # cri-o manages netns, they're in /var/run/netns/* or similar. netns_dir = os.path.dirname(netns) netns_paths = os.listdir(netns_dir) netns_paths = [ os.path.join(netns_dir, netns_path) for netns_path in netns_paths ] for netns_path in netns_paths: netns_path = os.fsdecode(netns_path) try: # NOTE(dulek): inode can be used to clearly distinguish the # netns' as `man namespaces` says: # # Since Linux 3.8, they appear as symbolic links. If two # processes are in the same namespace, then the device IDs and # inode numbers of their /proc/[pid]/ns/xxx symbolic links will # be the same; an application can check this using the # stat.st_dev and stat.st_ino fields returned by stat(2). netns_stat = os.stat(netns_path) netns_id = netns_stat.st_dev, netns_stat.st_ino except OSError: continue if netns_id in handled_netns: continue handled_netns.add(netns_id) try: with b_base.get_ipdb(netns_path) as c_ipdb: for ifname, iface in c_ipdb.interfaces.items(): if (iface.vlan_id == vlan_id and iface.link == vm_iface_index): LOG.warning( f'Found offending interface {ifname} with ' f'VLAN ID {vlan_id} in netns {netns_path}. ' f'Trying to remove it.') with c_ipdb.interfaces[ifname] as found_iface: found_iface.remove() break except OSError: continue
def connect(self, vif, ifname, netns, container_id): # NOTE(vikasc): Ideally 'ifname' should be used here but instead a # temporary name is being used while creating the device for # container in host network namespace. This is because cni expects # only 'eth0' as interface name and if host already has an # interface named 'eth0', device creation will fail with 'already # exists' error. temp_name = vif.vif_name # First let's take a peek into the pod namespace and try to remove any # leftover interface in case we got restarted before CNI returned to # kubelet. with b_base.get_ipdb(netns) as c_ipdb: self._remove_ifaces(c_ipdb, (temp_name, ifname), netns) # We might also have leftover interface in the host netns, let's try to # remove it too. This is outside of the main host's IPDB context # manager to make sure removal is commited before starting next # transaction. with b_base.get_ipdb() as h_ipdb: self._remove_ifaces(h_ipdb, (temp_name, )) try: with b_base.get_ipdb() as h_ipdb: # TODO(vikasc): evaluate whether we should have stevedore # driver for getting the link device. vm_iface_name = self._detect_iface_name(h_ipdb) mtu = h_ipdb.interfaces[vm_iface_name].mtu if mtu != vif.network.mtu: # NOTE(dulek): This might happen if Neutron and DHCP agent # have different MTU settings. See # https://bugs.launchpad.net/kuryr-kubernetes/+bug/1863212 raise exceptions.CNIBindingFailure( f'MTU of interface {vm_iface_name} ({mtu}) does not ' f'match MTU of pod network {vif.network.id} ' f'({vif.network.mtu}). Please make sure pod network ' f'has the same MTU as node (VM) network.') args = self._get_iface_create_args(vif) with h_ipdb.create(ifname=temp_name, link=h_ipdb.interfaces[vm_iface_name], **args) as iface: iface.net_ns_fd = utils.convert_netns(netns) except pyroute2.NetlinkError as e: if e.code == errno.EEXIST: # NOTE(dulek): This is related to bug 1854928. It's super-rare, # so aim of this piece is to gater any info useful # for determining when it happens. LOG.exception(f'Creation of pod interface failed due to VLAN ' f'ID (vlan_info={args}) conflict. Probably the ' f'CRI had not cleaned up the network namespace ' f'of deleted pods. This should not be a ' f'permanent issue but may cause restart of ' f'kuryr-cni pod.') raise with b_base.get_ipdb(netns) as c_ipdb: with c_ipdb.interfaces[temp_name] as iface: iface.ifname = ifname iface.mtu = vif.network.mtu iface.address = str(vif.address) iface.up()