def tearDown(self): super(_TestBGPAdvert, self).tearDown() self.delete_and_confirm(self.ns, "ns") try: # Delete the extra node. run("docker rm -f kube-node-extra") except subprocess.CalledProcessError: pass # Delete BGPPeers. calicoctl("delete bgppeer node-extra.peer", allow_fail=True) calicoctl("delete bgppeer peer-with-rr", allow_fail=True) # Restore node-to-node mesh. calicoctl("""apply -f - << EOF apiVersion: projectcalico.org/v3 kind: BGPConfiguration metadata: {name: default} spec: nodeToNodeMeshEnabled: true asNumber: 64512 EOF """) # Remove node-2's route-reflector config. json_str = calicoctl("get node %s -o json" % self.nodes[2]) node_dict = json.loads(json_str) node_dict['metadata']['labels'].pop('i-am-a-route-reflector', '') node_dict['spec']['bgp'].pop('routeReflectorClusterID', '') calicoctl("""apply -f - << EOF %s EOF """ % json.dumps(node_dict))
def tearDown(self): super(TestBGPAdvert, self).tearDown() self.delete_and_confirm(self.ns, "ns") try: run("docker rm -f kube-node-extra") except subprocess.CalledProcessError: pass
def setUpRR(self): super(TestBGPAdvert, self).setUp() # Create bgp test namespace self.ns = "bgp-test" self.create_namespace(self.ns) start_external_node_with_bgp("kube-node-extra", bird_conf_rr) # set CALICO_ADVERTISE_CLUSTER_IPS=10.96.0.0/12 self.update_ds_env("calico-node", "kube-system", "CALICO_ADVERTISE_CLUSTER_IPS", "10.96.0.0/12") # Enable debug logging self.update_ds_env("calico-node", "kube-system", "BGP_LOGSEVERITYSCREEN", "debug") # Establish BGPPeer from cluster nodes to node-extra using calicoctl # External peer has IP 10.192.0.5 run("""kubectl exec -i -n kube-system calicoctl -- /calicoctl apply -f - << EOF apiVersion: projectcalico.org/v3 kind: BGPPeer metadata: name: node-extra.peer spec: node: kube-node-2 peerIP: 10.192.0.5 asNumber: 64512 EOF """)
def kill_bird(self): run("docker exec %s pkill bird" % self.restart_node) def check_bird_running(): run("docker exec %s pgrep bird" % self.restart_node) retry_until_success(check_bird_running, retries=10, wait_time=1) time.sleep(5)
def delete_calico_node_pod(self): run("kubectl delete po %s -n kube-system" % self.restart_pod_name) # Wait until a replacement calico-node pod has been created. retry_until_success(self.get_restart_node_pod_name, retries=10, wait_time=1) # Wait until it is ready, before returning. run("kubectl wait po %s -n kube-system --timeout=2m --for=condition=ready" % self.restart_pod_name)
def check_connected(name): try: run(("kubectl exec %s -n policy-demo" + " -- /bin/wget -O /dev/null -q --timeout=1 nginx") % name) except subprocess.CalledProcessError: _log.exception("Failed to wget from nginx service") return False _log.debug("Contacted service") return True
def _test_restart_route_churn(self, num_repeats, restart_func, expect_churn): with DiagsCollector(): # Get 2 worker node names, one to monitor routes and one # to have its calico-node restarted. The first name # returned is always the master, so skip that. nodes, ips, _ = node_info() self.assertGreater(len(nodes), 2) monitor_node = nodes[1] self.restart_node = nodes[2] self.restart_node_ip = ips[2] # Start running ip monitor on the monitor node, to monitor # IPv4 route changes. We use "fd00:10:244" to identify # and exclude IPv6 workload block routes like # fd00:10:244:0:1cc0:b1ac:ad47:e7c0/122. These definitely # _do_ flap when the host of that block restarts, but it # is not yet clear why this is; specifically it is not yet # known if it indicates anything wrong with calico/node's # GR setup. See # https://marc.info/?l=bird-users&m=158298182509702&w=2 # for the mailing list discussion so far. run("docker exec -d %s sh -c 'stdbuf -oL ip -ts monitor route | stdbuf -oL grep -v fd00:10:244 > rmon.txt'" % monitor_node) # Find the name of the calico-node pod on the restart node. self.get_restart_node_pod_name() # Restart the calico-node several times, on the other node. for i in range(num_repeats): # Restart it. _log.info("Iteration %d: restart pod %s", i, self.restart_pod_name) restart_func(self) # Kill the ip monitor process. run("docker exec %s pkill ip" % monitor_node) # Dump the monitor output. monitor_output = run("docker exec %s cat rmon.txt" % monitor_node) if expect_churn: # Assert that it is not empty. self.assertNotEqual(monitor_output, "") else: # Assert that it is empty. self.assertEqual(monitor_output, "")
def get_restart_node_pod_name(self): self.restart_pod_name = run("kubectl get po -n kube-system" + " -l k8s-app=calico-node" + " --field-selector status.podIP=" + self.restart_node_ip + " -o jsonpath='{.items[*].metadata.name}'") if self.restart_pod_name == "": raise Exception('pod name not found')
def setUp(self): TestBase.setUp(self) self.create_namespace("policy-demo") self.deploy("nginx:1.7.9", "nginx", "policy-demo", 80) # Create two client pods that live for the duration of the # test. We will use 'kubectl exec' to try wgets from these at # particular times. # # We do it this way - instead of one-shot pods that are # created, try wget, and then exit - because it takes a # relatively long time (7 seconds?) in this test setup for # Calico routing and policy to be set up correctly for a newly # created pod. In particular it's possible that connection # from a just-created pod will fail because that pod's IP has # not yet propagated to the IP set for the ingress policy on # the server pod - which can confuse test code that is # expecting connection failure for some other reason. run("kubectl run --generator=run-pod/v1 access -n policy-demo" + " --image busybox --command /bin/sleep -- 3600") run("kubectl run --generator=run-pod/v1 no-access -n policy-demo" + " --image busybox --command /bin/sleep -- 3600")
def test_mainline(self): """ Runs the mainline tests for service ip advertisement - Create both a Local and a Cluster type NodePort service with a single replica. - assert only local and cluster CIDR routes are advertised. - assert /32 routes are used, source IP is preserved. - Create a local LoadBalancer service with clusterIP = None, assert no change. - Scale the Local NP service so it is running on multiple nodes, assert ECMP routing, source IP is preserved. - Delete both services, assert only cluster CIDR route is advertised. """ with DiagsCollector(): # Assert that a route to the service IP range is present. retry_until_success( lambda: self.assertIn("10.96.0.0/12", self.get_routes())) # Create both a Local and a Cluster type NodePort service with a single replica. local_svc = "nginx-local" cluster_svc = "nginx-cluster" self.deploy("nginx:1.7.9", local_svc, self.ns, 80) self.deploy("nginx:1.7.9", cluster_svc, self.ns, 80, traffic_policy="Cluster") self.wait_until_exists(local_svc, "svc", self.ns) self.wait_until_exists(cluster_svc, "svc", self.ns) # Get clusterIPs. local_svc_ip = self.get_svc_cluster_ip(local_svc, self.ns) cluster_svc_ip = self.get_svc_cluster_ip(cluster_svc, self.ns) # Wait for the deployments to roll out. self.wait_for_deployment(local_svc, self.ns) self.wait_for_deployment(cluster_svc, self.ns) # Assert that both nginx service can be curled from the external node. retry_until_success(curl, function_args=[local_svc_ip]) retry_until_success(curl, function_args=[cluster_svc_ip]) # Assert that local clusterIP is an advertised route and cluster clusterIP is not. retry_until_success( lambda: self.assertIn(local_svc_ip, self.get_routes())) retry_until_success( lambda: self.assertNotIn(cluster_svc_ip, self.get_routes())) # Create a network policy that only accepts traffic from the external node. run("""docker exec -i kube-master kubectl apply -f - << EOF apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: allow-tcp-80-ex namespace: bgp-test spec: podSelector: {} policyTypes: - Ingress ingress: - from: - ipBlock: { cidr: 10.192.0.5/32 } ports: - protocol: TCP port: 80 EOF """) # Connectivity to nginx-local should always succeed. for i in range(attempts): retry_until_success(curl, function_args=[local_svc_ip]) # Connectivity to nginx-cluster will rarely succeed because it is load-balanced across all nodes. # When the traffic hits a node that doesn't host one of the service's pod, it will be re-routed # to another node and SNAT will cause the policy to drop the traffic. # Try to curl 10 times. try: for i in range(attempts): curl("kube-node-extra", cluster_svc_ip) self.fail( "external node should not be able to consistently access the cluster svc" ) except subprocess.CalledProcessError: pass # Scale the local_svc to 4 replicas self.scale_deployment(local_svc, self.ns, 4) self.wait_for_deployment(local_svc, self.ns) self.assert_ecmp_routes(local_svc_ip) for i in range(attempts): retry_until_success(curl, function_args=[local_svc_ip]) # Delete both services, assert only cluster CIDR route is advertised. self.delete_and_confirm(local_svc, "svc", self.ns) self.delete_and_confirm(cluster_svc, "svc", self.ns) # Assert that clusterIP is no longer and advertised route retry_until_success( lambda: self.assertNotIn(local_svc_ip, self.get_routes()))
def test_rr(self): self.tearDown() self.setUpRR() # Create ExternalTrafficPolicy Local service with one endpoint on node-1 run("""kubectl apply -f - << EOF apiVersion: apps/v1 kind: Deployment metadata: name: nginx-rr namespace: bgp-test labels: app: nginx spec: replicas: 1 selector: matchLabels: app: nginx run: nginx-rr template: metadata: labels: app: nginx run: nginx-rr spec: containers: - name: nginx-rr image: nginx:1.7.9 ports: - containerPort: 80 nodeSelector: beta.kubernetes.io/os: linux kubernetes.io/hostname: kube-node-1 --- apiVersion: v1 kind: Service metadata: name: nginx-rr namespace: bgp-test labels: app: nginx run: nginx-rr spec: ports: - port: 80 targetPort: 80 selector: app: nginx run: nginx-rr type: NodePort externalTrafficPolicy: Local EOF """) run("kubectl exec -i -n kube-system calicoctl -- /calicoctl get nodes -o yaml" ) run("kubectl exec -i -n kube-system calicoctl -- /calicoctl get bgppeers -o yaml" ) run("kubectl exec -i -n kube-system calicoctl -- /calicoctl get bgpconfigs -o yaml" ) # Update the node-2 to behave as a route-reflector json_str = run( "kubectl exec -i -n kube-system calicoctl -- /calicoctl get node kube-node-2 -o json" ) node_dict = json.loads(json_str) node_dict['metadata']['labels']['i-am-a-route-reflector'] = 'true' node_dict['spec']['bgp']['routeReflectorClusterID'] = '224.0.0.1' run("""kubectl exec -i -n kube-system calicoctl -- /calicoctl apply -f - << EOF %s EOF """ % json.dumps(node_dict)) # Disable node-to-node mesh and configure bgp peering # between node-1 and RR and also between external node and RR run("""kubectl exec -i -n kube-system calicoctl -- /calicoctl apply -f - << EOF apiVersion: projectcalico.org/v3 kind: BGPConfiguration metadata: {name: default} spec: nodeToNodeMeshEnabled: false asNumber: 64512 EOF """) run("""kubectl exec -i -n kube-system calicoctl -- /calicoctl apply -f - << EOF apiVersion: projectcalico.org/v3 kind: BGPPeer metadata: {name: kube-node-1} spec: node: kube-node-1 peerIP: 10.192.0.4 asNumber: 64512 EOF """) svc_json = run("kubectl get svc nginx-rr -n bgp-test -o json") svc_dict = json.loads(svc_json) svcRoute = svc_dict['spec']['clusterIP'] retry_until_success(lambda: self.assertIn(svcRoute, self.get_routes()))
def get_svc_cluster_ip(self, svc, ns): return run("kubectl get svc %s -n %s -o json | jq -r .spec.clusterIP" % (svc, ns)).strip()
def check_bird_running(): run("docker exec %s pgrep bird" % self.restart_node)
def tearDown(self): # Delete deployment run("kubectl delete --grace-period 0 pod access -n policy-demo") run("kubectl delete --grace-period 0 pod no-access -n policy-demo") self.delete_and_confirm("policy-demo", "ns")