def _node_calico(upd, with_testing, node_name, node_ip): helpers.remote_install(CALICO_CNI, with_testing) helpers.remote_install(CALICOCTL, with_testing) _create_etcd_config() _create_calico_config() run('python /var/lib/kuberdock/scripts/kubelet_args.py --network-plugin=') run('python /var/lib/kuberdock/scripts/kubelet_args.py ' '--network-plugin=cni --network-plugin-dir=/etc/cni/net.d') # pull image separately to get reed of calicoctl timeouts for i in range(3): run('sync') rv = run('docker pull kuberdock/calico-node:0.22.0-kd2') if not rv.failed: break upd.print_log("Pull calico-node failed. Doing retry {}".format(i)) sleep(10) if rv.failed: raise helpers.UpgradeError( "Can't pull calico-node image after 3 retries: {}".format(rv)) rv = run('ETCD_AUTHORITY="{0}:2379" ' 'ETCD_SCHEME=https ' 'ETCD_CA_CERT_FILE=/etc/pki/etcd/ca.crt ' 'ETCD_CERT_FILE=/etc/pki/etcd/etcd-client.crt ' 'ETCD_KEY_FILE=/etc/pki/etcd/etcd-client.key ' 'HOSTNAME="{1}" ' '/opt/bin/calicoctl node ' '--ip="{2}" ' '--node-image=kuberdock/calico-node:0.22.0-kd2'.format( MASTER_IP, node_name, node_ip)) if rv.failed: raise helpers.UpgradeError("Can't start calico node: {}".format(rv))
def _master_calico(upd, calico_network): rv = helpers.local( 'ETCD_AUTHORITY=127.0.0.1:4001 /opt/bin/calicoctl pool add ' '{} --ipip --nat-outgoing'.format(calico_network)) if rv.failed: raise helpers.UpgradeError("Can't add calicoctl pool: {}".format(rv)) for i in range(3): helpers.local('sync') rv = helpers.local('docker pull kuberdock/calico-node:0.22.0-kd2') if not rv.failed: break upd.print_log("Pull calico-node failed. Doing retry {}".format(i)) sleep(10) if rv.failed: raise helpers.UpgradeError( "Can't pull calico-node image after 3 retries: {}".format(rv)) helpers.local("sync && sleep 5") rv = helpers.local( 'ETCD_AUTHORITY=127.0.0.1:4001 /opt/bin/calicoctl node ' '--ip="{0}" --node-image=kuberdock/calico-node:0.22.0-kd2'.format( MASTER_IP)) if rv.failed: raise helpers.UpgradeError("Can't start calico node: {}".format(rv)) helpers.local("sync && sleep 5")
def downgrade(upd, with_testing, exception, *args, **kwargs): _downgrade_k8s_master(upd, with_testing) service, res = helpers.restart_master_kubernetes() if res != 0: raise helpers.UpgradeError('Failed to restart {0}. {1}' .format(service, res)) _downgrade_etcd(upd) # Restart KD to make sure new libs are running res = helpers.restart_service(settings.KUBERDOCK_SERVICE) if res != 0: raise helpers.UpgradeError('Failed to restart KuberDock') helpers.downgrade_db(revision='3c832810a33c')
def _update_00174_upgrade_node(upd, with_testing): upd.print_log("Upgrading kubernetes") helpers.remote_install(K8S_NODE, with_testing) service, res = helpers.restart_node_kubernetes() if res != 0: raise helpers.UpgradeError('Failed to restart {0}. {1}'.format( service, res))
def upgrade(upd, with_testing, *args, **kwargs): _upgrade_k8s_master(upd, with_testing) service, res = helpers.restart_master_kubernetes() if res != 0: raise helpers.UpgradeError('Failed to restart {0}. {1}' .format(service, res)) _upgrade_etcd(upd) # Restart KD to make sure new libs are running res = helpers.restart_service(settings.KUBERDOCK_SERVICE) if res != 0: raise helpers.UpgradeError('Failed to restart KuberDock') helpers.upgrade_db() _update_pv_mount_paths(upd)
def upgrade_node(upd, with_testing, env, *args, **kwargs): run('yum --enablerepo=kube,kube-testing clean metadata') #00110_update.py upd.print_log('Fix node hostname in rsyslog configuration...') run("sed -i 's/^{0} .*/{0} {1}/' {2}".format(PARAM, env.host_string, CONF)) run('systemctl restart rsyslog') #00111_update.py res = helpers.remote_install('kubernetes-node-1.1.3-3.el7.cloudlinux', with_testing) upd.print_log(res) if res.failed: raise helpers.UpgradeError('Failed to update kubernetes on node') get(KUBELET_PATH, KUBELET_TEMP_PATH) lines = [] with open(KUBELET_TEMP_PATH) as f: lines = f.readlines() with open(KUBELET_TEMP_PATH, 'w+') as f: for line in lines: if KUBELET_ARG in line and KUBELET_MULTIPLIERS not in line: s = line.split('"') s[1] += KUBELET_MULTIPLIERS line = '"'.join(s) f.write(line) put(KUBELET_TEMP_PATH, KUBELET_PATH) os.remove(KUBELET_TEMP_PATH) helpers.restart_node_kubernetes(with_enable=True)
def upgrade(upd, with_testing, *args, **kwargs): upd.print_log('Generating new auth config file for nodes...') with open('/etc/kubernetes/kubelet_token.dat') as f: data = json.load(f) token = data['BearerToken'] with open('/etc/kubernetes/configfile_for_nodes', 'w') as f: f.write(configfile.format(token, MASTER_IP)) upd.print_log('Changing config files...') upd.print_log('1) controller-manager', helpers.local('mv /etc/kubernetes/controller-manager.rpmnew ' '/etc/kubernetes/controller-manager')) upd.print_log('2) kube-apiserver') with open('/etc/kubernetes/apiserver') as f: data = f.read().replace('--portal_net', '--service-cluster-ip-range') data = data.replace('AutoProvision,LimitRanger', 'Lifecycle,NamespaceExists,LimitRanger,SecurityContextDeny,ServiceAccount') data = data.replace('--public_address_override', '--bind-address') with open('/etc/kubernetes/apiserver', 'w') as f: f.write(data) upd.print_log('Done.') upd.print_log('Trying to restart master kubernetes...') service, code = helpers.restart_master_kubernetes(with_enable=True) if code != 0: raise helpers.UpgradeError('Kubernetes not restarted. ' 'Service {0} code {1}'.format(service, code)) else: upd.print_log('Deleting old token file', helpers.local('rm -f /etc/kubernetes/kubelet_token.dat')) helpers.local('rm -f /etc/kubernetes/apiserver.rpmnew')
def upgrade_node(upd, with_testing, env, *args, **kwargs): _upgrade_k8s_node(upd, with_testing) service, res = helpers.restart_node_kubernetes() if res != 0: raise helpers.UpgradeError('Failed to restart {0}. {1}' .format(service, res)) _update_node_network_plugin(upd, env)
def upgrade_node(upd, with_testing, env, *args, **kwargs): run('yum --enablerepo=kube,kube-testing clean metadata') # 00084_update.py yum_base_no_kube = 'yum install --disablerepo=kube -y ' run(yum_base_no_kube + 'kernel') run(yum_base_no_kube + 'kernel-tools') run(yum_base_no_kube + 'kernel-tools-libs') run(yum_base_no_kube + 'kernel-headers') run(yum_base_no_kube + 'kernel-devel') run('rpm -e -v --nodeps kernel-' + old_version) run('yum remove -y kernel-tools-' + old_version) run('yum remove -y kernel-tools-libs-' + old_version) run('yum remove -y kernel-headers-' + old_version) run('yum remove -y kernel-devel-' + old_version) # 00086_update.py res = helpers.remote_install('kubernetes-node-1.1.3', with_testing) upd.print_log(res) if res.failed: raise helpers.UpgradeError('Failed to update kubernetes on node') upd.print_log("Turn on cpu-cfs-quota in kubelet") get(KUBELET_PATH, KUBELET_TEMP_PATH) lines = [] with open(KUBELET_TEMP_PATH) as f: lines = f.readlines() with open(KUBELET_TEMP_PATH, 'w+') as f: for line in lines: if KUBELET_ARG in line and not KUBELET_CPUCFS_ENABLE in KUBELET_ARG: s = line.split('"') s[1] += KUBELET_CPUCFS_ENABLE line = '"'.join(s) f.write(line) put(KUBELET_TEMP_PATH, KUBELET_PATH) os.remove(KUBELET_TEMP_PATH) helpers.restart_node_kubernetes(with_enable=True) upd.print_log("Restart pods to apply new limits") pc = PodCollection() pods = pc.get(as_json=False) for pod in pods: if (pod.get('host') == env.host_string and pod['status'] == POD_STATUSES.running): pc.update_container(pod['id'], None) # 00088_update.py put('/var/opt/kuberdock/node_network_plugin.sh', PLUGIN_DIR + 'kuberdock') put('/var/opt/kuberdock/node_network_plugin.py', PLUGIN_DIR + 'kuberdock.py') run('systemctl restart kuberdock-watcher') helpers.reboot_node(upd)
def _upgrade_docker(upd, with_testing): def alter_config(line): if not re.match(r'OPTIONS=.*', line): return line to_remove = (r'\s*(--log-level=[^\s\']+\s*)|(-l \[^\s\']+\s*)', r'\s*(--log-driver=[^\s\']+)') for pattern in to_remove: line = re.sub(pattern, '', line) return re.sub( r"OPTIONS='(.*)'", r"OPTIONS='\1 --log-driver=json-file --log-level=error'", line) upd.print_log("Docker before pkg upgrade " + run("docker --version")) helpers.remote_install(SELINUX, with_testing) helpers.remote_install(DOCKER, with_testing) upd.print_log("Docker after pkg upgrade " + run("docker --version")) docker_config = StringIO() get('/etc/sysconfig/docker', docker_config) current_config = docker_config.getvalue() new_config = '\n'.join( alter_config(l) for l in current_config.splitlines()) run("cat << EOF > /etc/sysconfig/docker\n{}\nEOF".format(new_config)) run("mkdir -p /etc/systemd/system/docker.service.d/") run("cat << EOF > /etc/systemd/system/docker.service.d/timeouts.conf\n" "{}\nEOF".format(DOCKER_TIMEOUTS_DROPIN)) # If we restart docker here then rest of node upgrade code will be # executed with fresh new docker (don't know whether this is good or bad) # and also will results in pods/containers restart at this moment, which # will produce lots of events and load on node. # If not, then docker will be old till node reboot at the end of upgrade. # So we probably could comment restart part (known to work ~ok) run("systemctl daemon-reload") start_time = time.time() # Because of bug in our package docker could be running again at this moment # Maybe this is because of rpm %systemd hooks or else, so ensure it stopped # again before restart to prevent timeouts upd.print_log("===== Docker.service restart timeout has been increased to " "10 min, please, don't interrupt it before timeout ======") res = run("bash -c 'for i in $(seq 1 5); do systemctl stop docker; done; " "sleep 1; systemctl restart docker;'") upd.print_log( "Docker second_stop/restart took: {} secs".format(time.time() - start_time)) if res.failed: raise helpers.UpgradeError('Failed to restart docker. {}'.format(res)) upd.print_log(run("docker --version"))
def checkout_calico_network(): cp = ConfigParser.ConfigParser() cp.read(KUBERDOCK_MAIN_CONFIG) try: v = cp.get('main', 'CALICO_NETWORK') except ConfigParser.Error: v = None if v: return v nets = helpers.local( "ip -o -4 addr | grep -vP '\slo\s' | awk '{print $4}'") calico_network = get_calico_network(nets) if not calico_network: raise helpers.UpgradeError("Can't find suitable network for Calico") cp.set('main', 'CALICO_NETWORK', calico_network) with open(KUBERDOCK_MAIN_CONFIG, 'wb') as configfile: cp.write(configfile) return calico_network
def upgrade(cls, upd): upd.print_log('Update influxdb...') # remove old version with all settings and all data helpers.stop_service('influxdb') helpers.local('rm -rf /opt/influxdb') helpers.local('rm /etc/systemd/system/influxdb.service') if os.path.isdir('/var/lib/influxdb/'): helpers.local('chown -R influxdb /var/lib/influxdb/') helpers.local('chgrp -R influxdb /var/lib/influxdb/') helpers.local('systemctl daemon-reload') # install new version helpers.local('systemctl reenable influxdb') helpers.local('systemctl restart influxdb') # wait starting t = 1 success = False ping_url = 'http://%s:%s/ping' % ( settings.INFLUXDB_HOST, settings.INFLUXDB_PORT) for _ in xrange(5): try: requests.get(ping_url) except requests.ConnectionError: sleep(t) t *= 2 else: success = True break if not success: raise helpers.UpgradeError('Influxdb does not answer to ping') # initialization helpers.local( 'influx -execute ' '"create user {u} with password \'{p}\' with all privileges"' .format(u=settings.INFLUXDB_USER, p=settings.INFLUXDB_PASSWORD)) helpers.local('influx -execute "create database {db}"' .format(db=settings.INFLUXDB_DATABASE))
def upgrade_node(upd, with_testing, env, *args, **kwargs): upd.print_log('Replacing kubernetes with new kubernetes-node...') upd.print_log( helpers.remote_install( 'kubernetes kubernetes-node-0.20.2-0.4.git323fde5.el7.centos.2', with_testing, 'swap')) upd.print_log('Replacing auth config with new...') put('/etc/kubernetes/configfile_for_nodes', '/etc/kubernetes/configfile') run("""sed -i '/^KUBELET_ARGS/ {s|--auth_path=/var/lib/kubelet/kubernetes_auth|--kubeconfig=/etc/kubernetes/configfile --register-node=false|}' /etc/kubernetes/kubelet""") run("""sed -i '/^KUBE_MASTER/ {s|http://|https://|}' /etc/kubernetes/config""") run("""sed -i '/^KUBE_MASTER/ {s|7080|6443|}' /etc/kubernetes/config""") run("""sed -i '/^KUBE_PROXY_ARGS/ {s|""|"--kubeconfig=/etc/kubernetes/configfile"|}' /etc/kubernetes/proxy""") service, res = helpers.restart_node_kubernetes(with_enable=True) if res != 0: raise helpers.UpgradeError('Failed to restart {0}. {1}' .format(service, res)) else: upd.print_log(res) print run('rm -f /var/lib/kubelet/kubernetes_auth')
def _upgrade_199(upd, with_testing, *args, **kwargs): ku = User.get_internal() pod = db.session.query(Pod).filter_by(name=KUBERDOCK_DNS_POD_NAME, owner=ku).first() nodes = Node.query.all() if not nodes: upd.print_log('No nodes found, exiting') return for node in nodes: k8s_node = node_utils._get_k8s_node_by_host(node.hostname) status, _ = node_utils.get_status(node, k8s_node) if status == NODE_STATUSES.running: if pod: pc = PodCollection() pc.delete(pod.id, force=True) create_dns_pod(node.hostname, ku) return raise helpers.UpgradeError("Can't find any running node to run dns pod")
def _raise_on_failure(cls, service, res): if res != 0: raise helpers.UpgradeError('Failed to restart {0}. {1}' .format(service, res))
def _master_network_policy(upd, calico_network): RULE_NEXT_TIER = { "id": "next-tier", "order": 9999, "inbound_rules": [{ "action": "next-tier" }], "outbound_rules": [{ "action": "next-tier" }], "selector": "all()" } helpers.local("etcdctl set /calico/v1/policy/tier/failsafe/metadata " "'{\"order\": 0}'") helpers.local( "etcdctl set /calico/v1/policy/tier/kuberdock-hosts/metadata " "'{\"order\": 5}'") helpers.local( 'etcdctl mkdir /calico/v1/policy/tier/kuberdock-hosts/policy') helpers.local( "etcdctl set /calico/v1/policy/tier/kuberdock-hosts/policy/next-tier " "'{}'".format(json.dumps(RULE_NEXT_TIER))) helpers.local( "etcdctl set /calico/v1/policy/tier/kuberdock-nodes/metadata " "'{\"order\": 10}'") helpers.local( "etcdctl set /calico/v1/policy/tier/kuberdock-service/metadata " "'{\"order\": 20}'") helpers.local( 'etcdctl mkdir /calico/v1/policy/tier/kuberdock-service/policy') helpers.local( "etcdctl set /calico/v1/policy/tier/kuberdock-service/policy/next-tier " "'{}'".format(json.dumps(RULE_NEXT_TIER))) KD_HOST_ROLE = 'kdnode' helpers.local("sync && sleep 5") upd.print_log('Trying to get master tunnel IP...') retry_pause = 3 max_retries = 10 MASTER_TUNNEL_IP = retry(get_calico_ip_tunnel_address, retry_pause, max_retries) upd.print_log('Master tunnel IP is: {}'.format(MASTER_TUNNEL_IP)) if not MASTER_TUNNEL_IP: raise helpers.UpgradeError("Failed to get master tunnel IP") KD_NODES_NEXT_TIER_FOR_PODS = { "id": "kd-nodes-dont-drop-pods-traffic", "selector": "has(kuberdock-pod-uid)", "order": 50, "inbound_rules": [{ "action": "next-tier" }], "outbound_rules": [{ "action": "next-tier" }] } KD_NODES_POLICY = { "id": "kd-nodes-public", "selector": 'role=="{}"'.format(KD_HOST_ROLE), "order": 100, "inbound_rules": [ { "src_net": "{}/32".format(MASTER_IP), "action": "allow" }, { "src_net": "{}/32".format(MASTER_TUNNEL_IP), "action": "allow" }, { "protocol": "tcp", "dst_ports": [22], "action": "allow" }, ], "outbound_rules": [{ "action": "allow" }] } helpers.local( "etcdctl set " "/calico/v1/policy/tier/kuberdock-nodes/policy/kuberdock-nodes '{}'". format(json.dumps(KD_NODES_POLICY))) helpers.local( "etcdctl set " "/calico/v1/policy/tier/kuberdock-nodes/policy/pods-next-tier '{}'". format(json.dumps(KD_NODES_NEXT_TIER_FOR_PODS))) KD_MASTER_ROLE = 'kdmaster' master_public_tcp_ports = [22, 80, 443, 6443, 2379, 8123, 8118] master_public_udp_ports = [123] KD_MASTER_POLICY = { "id": "kdmaster-public", "selector": 'role=="{}"'.format(KD_MASTER_ROLE), "order": 200, "inbound_rules": [{ "protocol": "tcp", "dst_ports": master_public_tcp_ports, "action": "allow" }, { "protocol": "udp", "dst_ports": master_public_udp_ports, "action": "allow" }, { "action": "next-tier" }], "outbound_rules": [{ "action": "allow" }] } helpers.local( "etcdctl set " "/calico/v1/policy/tier/kuberdock-nodes/policy/kuberdock-master '{}'". format(json.dumps(KD_MASTER_POLICY))) KD_NODES_FAILSAFE_POLICY = { "id": "failsafe-all", "selector": "all()", "order": 100, "inbound_rules": [{ "protocol": "icmp", "action": "allow" }, { "dst_net": calico_network, "src_net": "{}/32".format(MASTER_TUNNEL_IP), "action": "allow" }, { "action": "next-tier" }], "outbound_rules": [{ "protocol": "tcp", "dst_ports": [2379], "dst_net": "{}/32".format(MASTER_IP), "action": "allow" }, { "src_net": "{}/32".format(MASTER_TUNNEL_IP), "action": "allow" }, { "protocol": "udp", "dst_ports": [67], "action": "allow" }, { "action": "next-tier" }] } helpers.local( "etcdctl set " "/calico/v1/policy/tier/failsafe/policy/failsafe '{}'".format( json.dumps(KD_NODES_FAILSAFE_POLICY))) MASTER_HOST_ENDPOINT = { "expected_ipv4_addrs": [MASTER_IP], "labels": { "role": KD_MASTER_ROLE }, "profile_ids": [] } MASTER_HOSTNAME = socket.gethostname() etcd_path = '/calico/v1/host/{0}/endpoint/{0}'.format(MASTER_HOSTNAME) helpers.local("etcdctl set {} '{}'".format( etcd_path, json.dumps(MASTER_HOST_ENDPOINT)))
def downgrade_node(upd, with_testing, env, exception, *args, **kwargs): _downgrade_k8s_node(upd, with_testing) service, res = helpers.restart_node_kubernetes() if res != 0: raise helpers.UpgradeError('Failed to restart {0}. {1}' .format(service, res))