Ejemplo n.º 1
0
def _node_calico(upd, with_testing, node_name, node_ip):
    helpers.remote_install(CALICO_CNI, with_testing)
    helpers.remote_install(CALICOCTL, with_testing)

    _create_etcd_config()
    _create_calico_config()

    run('python /var/lib/kuberdock/scripts/kubelet_args.py --network-plugin=')
    run('python /var/lib/kuberdock/scripts/kubelet_args.py '
        '--network-plugin=cni --network-plugin-dir=/etc/cni/net.d')

    # pull image separately to get reed of calicoctl timeouts
    for i in range(3):
        run('sync')
        rv = run('docker pull kuberdock/calico-node:0.22.0-kd2')
        if not rv.failed:
            break
        upd.print_log("Pull calico-node failed. Doing retry {}".format(i))
        sleep(10)
    if rv.failed:
        raise helpers.UpgradeError(
            "Can't pull calico-node image after 3 retries: {}".format(rv))

    rv = run('ETCD_AUTHORITY="{0}:2379" '
             'ETCD_SCHEME=https '
             'ETCD_CA_CERT_FILE=/etc/pki/etcd/ca.crt '
             'ETCD_CERT_FILE=/etc/pki/etcd/etcd-client.crt '
             'ETCD_KEY_FILE=/etc/pki/etcd/etcd-client.key '
             'HOSTNAME="{1}" '
             '/opt/bin/calicoctl node '
             '--ip="{2}" '
             '--node-image=kuberdock/calico-node:0.22.0-kd2'.format(
                 MASTER_IP, node_name, node_ip))
    if rv.failed:
        raise helpers.UpgradeError("Can't start calico node: {}".format(rv))
Ejemplo n.º 2
0
def _master_calico(upd, calico_network):
    rv = helpers.local(
        'ETCD_AUTHORITY=127.0.0.1:4001 /opt/bin/calicoctl pool add '
        '{} --ipip --nat-outgoing'.format(calico_network))
    if rv.failed:
        raise helpers.UpgradeError("Can't add calicoctl pool: {}".format(rv))

    for i in range(3):
        helpers.local('sync')
        rv = helpers.local('docker pull kuberdock/calico-node:0.22.0-kd2')
        if not rv.failed:
            break
        upd.print_log("Pull calico-node failed. Doing retry {}".format(i))
        sleep(10)
    if rv.failed:
        raise helpers.UpgradeError(
            "Can't pull calico-node image after 3 retries: {}".format(rv))

    helpers.local("sync && sleep 5")
    rv = helpers.local(
        'ETCD_AUTHORITY=127.0.0.1:4001 /opt/bin/calicoctl node '
        '--ip="{0}" --node-image=kuberdock/calico-node:0.22.0-kd2'.format(
            MASTER_IP))
    if rv.failed:
        raise helpers.UpgradeError("Can't start calico node: {}".format(rv))
    helpers.local("sync && sleep 5")
Ejemplo n.º 3
0
def downgrade(upd, with_testing, exception, *args, **kwargs):
    _downgrade_k8s_master(upd, with_testing)
    service, res = helpers.restart_master_kubernetes()
    if res != 0:
        raise helpers.UpgradeError('Failed to restart {0}. {1}'
                                   .format(service, res))

    _downgrade_etcd(upd)

    # Restart KD to make sure new libs are running
    res = helpers.restart_service(settings.KUBERDOCK_SERVICE)
    if res != 0:
        raise helpers.UpgradeError('Failed to restart KuberDock')
    helpers.downgrade_db(revision='3c832810a33c')
Ejemplo n.º 4
0
def _update_00174_upgrade_node(upd, with_testing):
    upd.print_log("Upgrading kubernetes")
    helpers.remote_install(K8S_NODE, with_testing)
    service, res = helpers.restart_node_kubernetes()
    if res != 0:
        raise helpers.UpgradeError('Failed to restart {0}. {1}'.format(
            service, res))
Ejemplo n.º 5
0
def upgrade(upd, with_testing, *args, **kwargs):
    _upgrade_k8s_master(upd, with_testing)
    service, res = helpers.restart_master_kubernetes()
    if res != 0:
        raise helpers.UpgradeError('Failed to restart {0}. {1}'
                                   .format(service, res))

    _upgrade_etcd(upd)

    # Restart KD to make sure new libs are running
    res = helpers.restart_service(settings.KUBERDOCK_SERVICE)
    if res != 0:
        raise helpers.UpgradeError('Failed to restart KuberDock')

    helpers.upgrade_db()
    _update_pv_mount_paths(upd)
Ejemplo n.º 6
0
def upgrade_node(upd, with_testing, env, *args, **kwargs):
    run('yum --enablerepo=kube,kube-testing clean metadata')
    #00110_update.py
    upd.print_log('Fix node hostname in rsyslog configuration...')
    run("sed -i 's/^{0} .*/{0} {1}/' {2}".format(PARAM, env.host_string, CONF))
    run('systemctl restart rsyslog')

    #00111_update.py
    res = helpers.remote_install('kubernetes-node-1.1.3-3.el7.cloudlinux',
                                 with_testing)
    upd.print_log(res)
    if res.failed:
        raise helpers.UpgradeError('Failed to update kubernetes on node')
    get(KUBELET_PATH, KUBELET_TEMP_PATH)
    lines = []
    with open(KUBELET_TEMP_PATH) as f:
        lines = f.readlines()
    with open(KUBELET_TEMP_PATH, 'w+') as f:
        for line in lines:
            if KUBELET_ARG in line and KUBELET_MULTIPLIERS not in line:
                s = line.split('"')
                s[1] += KUBELET_MULTIPLIERS
                line = '"'.join(s)
            f.write(line)
    put(KUBELET_TEMP_PATH, KUBELET_PATH)
    os.remove(KUBELET_TEMP_PATH)
    helpers.restart_node_kubernetes(with_enable=True)
Ejemplo n.º 7
0
def upgrade(upd, with_testing, *args, **kwargs):
    upd.print_log('Generating new auth config file for nodes...')
    with open('/etc/kubernetes/kubelet_token.dat') as f:
        data = json.load(f)
    token = data['BearerToken']
    with open('/etc/kubernetes/configfile_for_nodes', 'w') as f:
        f.write(configfile.format(token, MASTER_IP))

    upd.print_log('Changing config files...')
    upd.print_log('1) controller-manager',
                  helpers.local('mv /etc/kubernetes/controller-manager.rpmnew '
                                '/etc/kubernetes/controller-manager'))
    upd.print_log('2) kube-apiserver')
    with open('/etc/kubernetes/apiserver') as f:
        data = f.read().replace('--portal_net', '--service-cluster-ip-range')
        data = data.replace('AutoProvision,LimitRanger', 'Lifecycle,NamespaceExists,LimitRanger,SecurityContextDeny,ServiceAccount')
        data = data.replace('--public_address_override', '--bind-address')
    with open('/etc/kubernetes/apiserver', 'w') as f:
        f.write(data)
    upd.print_log('Done.')

    upd.print_log('Trying to restart master kubernetes...')
    service, code = helpers.restart_master_kubernetes(with_enable=True)
    if code != 0:
        raise helpers.UpgradeError('Kubernetes not restarted. '
                                   'Service {0} code {1}'.format(service, code))
    else:
        upd.print_log('Deleting old token file',
                      helpers.local('rm -f /etc/kubernetes/kubelet_token.dat'))
    helpers.local('rm -f /etc/kubernetes/apiserver.rpmnew')
Ejemplo n.º 8
0
def upgrade_node(upd, with_testing, env, *args, **kwargs):
    _upgrade_k8s_node(upd, with_testing)
    service, res = helpers.restart_node_kubernetes()
    if res != 0:
        raise helpers.UpgradeError('Failed to restart {0}. {1}'
                                   .format(service, res))

    _update_node_network_plugin(upd, env)
Ejemplo n.º 9
0
def upgrade_node(upd, with_testing, env, *args, **kwargs):
    run('yum --enablerepo=kube,kube-testing clean metadata')

    # 00084_update.py
    yum_base_no_kube = 'yum install --disablerepo=kube -y '

    run(yum_base_no_kube + 'kernel')
    run(yum_base_no_kube + 'kernel-tools')
    run(yum_base_no_kube + 'kernel-tools-libs')
    run(yum_base_no_kube + 'kernel-headers')
    run(yum_base_no_kube + 'kernel-devel')

    run('rpm -e -v --nodeps kernel-' + old_version)
    run('yum remove -y kernel-tools-' + old_version)
    run('yum remove -y kernel-tools-libs-' + old_version)
    run('yum remove -y kernel-headers-' + old_version)
    run('yum remove -y kernel-devel-' + old_version)

    # 00086_update.py
    res = helpers.remote_install('kubernetes-node-1.1.3', with_testing)
    upd.print_log(res)
    if res.failed:
        raise helpers.UpgradeError('Failed to update kubernetes on node')
    upd.print_log("Turn on cpu-cfs-quota in kubelet")

    get(KUBELET_PATH, KUBELET_TEMP_PATH)
    lines = []
    with open(KUBELET_TEMP_PATH) as f:
        lines = f.readlines()
    with open(KUBELET_TEMP_PATH, 'w+') as f:
        for line in lines:
            if KUBELET_ARG in line and not KUBELET_CPUCFS_ENABLE in KUBELET_ARG:
                s = line.split('"')
                s[1] += KUBELET_CPUCFS_ENABLE
                line = '"'.join(s)
            f.write(line)
    put(KUBELET_TEMP_PATH, KUBELET_PATH)
    os.remove(KUBELET_TEMP_PATH)
    helpers.restart_node_kubernetes(with_enable=True)
    upd.print_log("Restart pods to apply new limits")
    pc = PodCollection()
    pods = pc.get(as_json=False)
    for pod in pods:
        if (pod.get('host') == env.host_string and
            pod['status'] == POD_STATUSES.running):
            pc.update_container(pod['id'], None)

    # 00088_update.py
    put('/var/opt/kuberdock/node_network_plugin.sh', PLUGIN_DIR + 'kuberdock')
    put('/var/opt/kuberdock/node_network_plugin.py', PLUGIN_DIR + 'kuberdock.py')
    run('systemctl restart kuberdock-watcher')

    helpers.reboot_node(upd)
Ejemplo n.º 10
0
def _upgrade_docker(upd, with_testing):
    def alter_config(line):
        if not re.match(r'OPTIONS=.*', line):
            return line

        to_remove = (r'\s*(--log-level=[^\s\']+\s*)|(-l \[^\s\']+\s*)',
                     r'\s*(--log-driver=[^\s\']+)')
        for pattern in to_remove:
            line = re.sub(pattern, '', line)

        return re.sub(
            r"OPTIONS='(.*)'",
            r"OPTIONS='\1 --log-driver=json-file --log-level=error'", line)

    upd.print_log("Docker before pkg upgrade " + run("docker --version"))
    helpers.remote_install(SELINUX, with_testing)
    helpers.remote_install(DOCKER, with_testing)
    upd.print_log("Docker after pkg upgrade " + run("docker --version"))

    docker_config = StringIO()
    get('/etc/sysconfig/docker', docker_config)
    current_config = docker_config.getvalue()
    new_config = '\n'.join(
        alter_config(l) for l in current_config.splitlines())

    run("cat << EOF > /etc/sysconfig/docker\n{}\nEOF".format(new_config))

    run("mkdir -p /etc/systemd/system/docker.service.d/")
    run("cat << EOF > /etc/systemd/system/docker.service.d/timeouts.conf\n"
        "{}\nEOF".format(DOCKER_TIMEOUTS_DROPIN))

    # If we restart docker here then rest of node upgrade code will be
    # executed with fresh new docker (don't know whether this is good or bad)
    # and also will results in pods/containers restart at this moment, which
    # will produce lots of events and load on node.
    # If not, then docker will be old till node reboot at the end of upgrade.
    # So we probably could comment restart part (known to work ~ok)
    run("systemctl daemon-reload")
    start_time = time.time()
    # Because of bug in our package docker could be running again at this moment
    # Maybe this is because of rpm %systemd hooks or else, so ensure it stopped
    # again before restart to prevent timeouts
    upd.print_log("===== Docker.service restart timeout has been increased to "
                  "10 min, please, don't interrupt it before timeout ======")
    res = run("bash -c 'for i in $(seq 1 5); do systemctl stop docker; done; "
              "sleep 1; systemctl restart docker;'")
    upd.print_log(
        "Docker second_stop/restart took: {} secs".format(time.time() -
                                                          start_time))
    if res.failed:
        raise helpers.UpgradeError('Failed to restart docker. {}'.format(res))
    upd.print_log(run("docker --version"))
Ejemplo n.º 11
0
def checkout_calico_network():
    cp = ConfigParser.ConfigParser()
    cp.read(KUBERDOCK_MAIN_CONFIG)
    try:
        v = cp.get('main', 'CALICO_NETWORK')
    except ConfigParser.Error:
        v = None
    if v:
        return v
    nets = helpers.local(
        "ip -o -4 addr | grep -vP '\slo\s' | awk '{print $4}'")
    calico_network = get_calico_network(nets)
    if not calico_network:
        raise helpers.UpgradeError("Can't find suitable network for Calico")
    cp.set('main', 'CALICO_NETWORK', calico_network)
    with open(KUBERDOCK_MAIN_CONFIG, 'wb') as configfile:
        cp.write(configfile)
    return calico_network
Ejemplo n.º 12
0
        def upgrade(cls, upd):
            upd.print_log('Update influxdb...')

            # remove old version with all settings and all data
            helpers.stop_service('influxdb')
            helpers.local('rm -rf /opt/influxdb')
            helpers.local('rm /etc/systemd/system/influxdb.service')

            if os.path.isdir('/var/lib/influxdb/'):
                helpers.local('chown -R influxdb /var/lib/influxdb/')
                helpers.local('chgrp -R influxdb /var/lib/influxdb/')
            helpers.local('systemctl daemon-reload')

            # install new version
            helpers.local('systemctl reenable influxdb')
            helpers.local('systemctl restart influxdb')

            # wait starting
            t = 1
            success = False
            ping_url = 'http://%s:%s/ping' % (
                settings.INFLUXDB_HOST, settings.INFLUXDB_PORT)
            for _ in xrange(5):
                try:
                    requests.get(ping_url)
                except requests.ConnectionError:
                    sleep(t)
                    t *= 2
                else:
                    success = True
                    break
            if not success:
                raise helpers.UpgradeError('Influxdb does not answer to ping')

            # initialization
            helpers.local(
                'influx -execute '
                '"create user {u} with password \'{p}\' with all privileges"'
                    .format(u=settings.INFLUXDB_USER,
                            p=settings.INFLUXDB_PASSWORD))
            helpers.local('influx -execute "create database {db}"'
                          .format(db=settings.INFLUXDB_DATABASE))
Ejemplo n.º 13
0
def upgrade_node(upd, with_testing, env, *args, **kwargs):
    upd.print_log('Replacing kubernetes with new kubernetes-node...')
    upd.print_log(
        helpers.remote_install(
            'kubernetes kubernetes-node-0.20.2-0.4.git323fde5.el7.centos.2',
            with_testing, 'swap'))

    upd.print_log('Replacing auth config with new...')
    put('/etc/kubernetes/configfile_for_nodes', '/etc/kubernetes/configfile')
    run("""sed -i '/^KUBELET_ARGS/ {s|--auth_path=/var/lib/kubelet/kubernetes_auth|--kubeconfig=/etc/kubernetes/configfile --register-node=false|}' /etc/kubernetes/kubelet""")

    run("""sed -i '/^KUBE_MASTER/ {s|http://|https://|}' /etc/kubernetes/config""")
    run("""sed -i '/^KUBE_MASTER/ {s|7080|6443|}' /etc/kubernetes/config""")
    run("""sed -i '/^KUBE_PROXY_ARGS/ {s|""|"--kubeconfig=/etc/kubernetes/configfile"|}' /etc/kubernetes/proxy""")
    service, res = helpers.restart_node_kubernetes(with_enable=True)
    if res != 0:
        raise helpers.UpgradeError('Failed to restart {0}. {1}'
                                   .format(service, res))
    else:
        upd.print_log(res)
        print run('rm -f /var/lib/kubelet/kubernetes_auth')
Ejemplo n.º 14
0
def _upgrade_199(upd, with_testing, *args, **kwargs):
    ku = User.get_internal()
    pod = db.session.query(Pod).filter_by(name=KUBERDOCK_DNS_POD_NAME,
                                          owner=ku).first()
    nodes = Node.query.all()

    if not nodes:
        upd.print_log('No nodes found, exiting')
        return

    for node in nodes:
        k8s_node = node_utils._get_k8s_node_by_host(node.hostname)
        status, _ = node_utils.get_status(node, k8s_node)
        if status == NODE_STATUSES.running:
            if pod:
                pc = PodCollection()
                pc.delete(pod.id, force=True)
            create_dns_pod(node.hostname, ku)
            return

    raise helpers.UpgradeError("Can't find any running node to run dns pod")
Ejemplo n.º 15
0
 def _raise_on_failure(cls, service, res):
     if res != 0:
         raise helpers.UpgradeError('Failed to restart {0}. {1}'
                                    .format(service, res))
Ejemplo n.º 16
0
def _master_network_policy(upd, calico_network):
    RULE_NEXT_TIER = {
        "id": "next-tier",
        "order": 9999,
        "inbound_rules": [{
            "action": "next-tier"
        }],
        "outbound_rules": [{
            "action": "next-tier"
        }],
        "selector": "all()"
    }
    helpers.local("etcdctl set /calico/v1/policy/tier/failsafe/metadata "
                  "'{\"order\": 0}'")
    helpers.local(
        "etcdctl set /calico/v1/policy/tier/kuberdock-hosts/metadata "
        "'{\"order\": 5}'")
    helpers.local(
        'etcdctl mkdir /calico/v1/policy/tier/kuberdock-hosts/policy')
    helpers.local(
        "etcdctl set /calico/v1/policy/tier/kuberdock-hosts/policy/next-tier "
        "'{}'".format(json.dumps(RULE_NEXT_TIER)))
    helpers.local(
        "etcdctl set /calico/v1/policy/tier/kuberdock-nodes/metadata "
        "'{\"order\": 10}'")
    helpers.local(
        "etcdctl set /calico/v1/policy/tier/kuberdock-service/metadata "
        "'{\"order\": 20}'")
    helpers.local(
        'etcdctl mkdir /calico/v1/policy/tier/kuberdock-service/policy')
    helpers.local(
        "etcdctl set /calico/v1/policy/tier/kuberdock-service/policy/next-tier "
        "'{}'".format(json.dumps(RULE_NEXT_TIER)))

    KD_HOST_ROLE = 'kdnode'
    helpers.local("sync && sleep 5")
    upd.print_log('Trying to get master tunnel IP...')
    retry_pause = 3
    max_retries = 10
    MASTER_TUNNEL_IP = retry(get_calico_ip_tunnel_address, retry_pause,
                             max_retries)
    upd.print_log('Master tunnel IP is: {}'.format(MASTER_TUNNEL_IP))
    if not MASTER_TUNNEL_IP:
        raise helpers.UpgradeError("Failed to get master tunnel IP")

    KD_NODES_NEXT_TIER_FOR_PODS = {
        "id": "kd-nodes-dont-drop-pods-traffic",
        "selector": "has(kuberdock-pod-uid)",
        "order": 50,
        "inbound_rules": [{
            "action": "next-tier"
        }],
        "outbound_rules": [{
            "action": "next-tier"
        }]
    }

    KD_NODES_POLICY = {
        "id":
        "kd-nodes-public",
        "selector":
        'role=="{}"'.format(KD_HOST_ROLE),
        "order":
        100,
        "inbound_rules": [
            {
                "src_net": "{}/32".format(MASTER_IP),
                "action": "allow"
            },
            {
                "src_net": "{}/32".format(MASTER_TUNNEL_IP),
                "action": "allow"
            },
            {
                "protocol": "tcp",
                "dst_ports": [22],
                "action": "allow"
            },
        ],
        "outbound_rules": [{
            "action": "allow"
        }]
    }
    helpers.local(
        "etcdctl set "
        "/calico/v1/policy/tier/kuberdock-nodes/policy/kuberdock-nodes '{}'".
        format(json.dumps(KD_NODES_POLICY)))
    helpers.local(
        "etcdctl set "
        "/calico/v1/policy/tier/kuberdock-nodes/policy/pods-next-tier '{}'".
        format(json.dumps(KD_NODES_NEXT_TIER_FOR_PODS)))

    KD_MASTER_ROLE = 'kdmaster'
    master_public_tcp_ports = [22, 80, 443, 6443, 2379, 8123, 8118]
    master_public_udp_ports = [123]
    KD_MASTER_POLICY = {
        "id":
        "kdmaster-public",
        "selector":
        'role=="{}"'.format(KD_MASTER_ROLE),
        "order":
        200,
        "inbound_rules": [{
            "protocol": "tcp",
            "dst_ports": master_public_tcp_ports,
            "action": "allow"
        }, {
            "protocol": "udp",
            "dst_ports": master_public_udp_ports,
            "action": "allow"
        }, {
            "action": "next-tier"
        }],
        "outbound_rules": [{
            "action": "allow"
        }]
    }
    helpers.local(
        "etcdctl set "
        "/calico/v1/policy/tier/kuberdock-nodes/policy/kuberdock-master '{}'".
        format(json.dumps(KD_MASTER_POLICY)))

    KD_NODES_FAILSAFE_POLICY = {
        "id":
        "failsafe-all",
        "selector":
        "all()",
        "order":
        100,
        "inbound_rules": [{
            "protocol": "icmp",
            "action": "allow"
        }, {
            "dst_net": calico_network,
            "src_net": "{}/32".format(MASTER_TUNNEL_IP),
            "action": "allow"
        }, {
            "action": "next-tier"
        }],
        "outbound_rules": [{
            "protocol": "tcp",
            "dst_ports": [2379],
            "dst_net": "{}/32".format(MASTER_IP),
            "action": "allow"
        }, {
            "src_net": "{}/32".format(MASTER_TUNNEL_IP),
            "action": "allow"
        }, {
            "protocol": "udp",
            "dst_ports": [67],
            "action": "allow"
        }, {
            "action": "next-tier"
        }]
    }
    helpers.local(
        "etcdctl set "
        "/calico/v1/policy/tier/failsafe/policy/failsafe '{}'".format(
            json.dumps(KD_NODES_FAILSAFE_POLICY)))

    MASTER_HOST_ENDPOINT = {
        "expected_ipv4_addrs": [MASTER_IP],
        "labels": {
            "role": KD_MASTER_ROLE
        },
        "profile_ids": []
    }
    MASTER_HOSTNAME = socket.gethostname()
    etcd_path = '/calico/v1/host/{0}/endpoint/{0}'.format(MASTER_HOSTNAME)
    helpers.local("etcdctl set {} '{}'".format(
        etcd_path, json.dumps(MASTER_HOST_ENDPOINT)))
Ejemplo n.º 17
0
def downgrade_node(upd, with_testing, env, exception, *args, **kwargs):
    _downgrade_k8s_node(upd, with_testing)
    service, res = helpers.restart_node_kubernetes()
    if res != 0:
        raise helpers.UpgradeError('Failed to restart {0}. {1}'
                                   .format(service, res))