def run_on_kubernetes(args): if args.test: cpu_request = '10m' memory_request = '200Mi' else: cpu_request = '500m' memory_request = '2Gi' context = util.get_cluster_prefix() namespace = util.get_current_namespace() if len(args.number) == 0: # Figure out the nodes based on the names of persistent disks, or just node 0 if none. args.number = range(max(1,len(get_persistent_disks(context, namespace)))) ensure_services_exist() util.ensure_secret_exists('rethinkdb-password', 'rethinkdb') args.local = False # so tag is for gcloud tag = util.get_tag(args, NAME, build) t = open(join('conf', '{name}.template.yaml'.format(name=NAME))).read() for number in args.number: ensure_persistent_disk_exists(context, namespace, number, args.size, args.type) with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w') as tmp: tmp.write(t.format(image = tag, number = number, pd_name = pd_name(context=context, namespace=namespace, number=number), health_delay = args.health_delay, cpu_request = cpu_request, memory_request = memory_request, pull_policy = util.pull_policy(args))) tmp.flush() util.update_deployment(tmp.name)
def run_on_kubernetes(args): create_gcloud_secret() context = util.get_cluster_prefix() namespace = util.get_current_namespace() if len(args.number) == 0: # Figure out the nodes based on the names of persistent disks, or just node 0 if none. args.number = range(max(1,len(get_persistent_disks(context, namespace)))) if 'storage-projects' not in util.get_services(): util.run(['kubectl', 'create', '-f', 'conf/service.yaml']) args.local = False # so tag is for gcloud tag = util.get_tag(args, NAME, build) if not args.tag: tag = tag[:tag.rfind('-')] # get rid of the final -[service] part of the tag. t = open(join('conf', '{name}.template.yaml'.format(name=NAME))).read() ensure_ssh() for number in args.number: deployment_name = "{name}{number}".format(name=NAME, number=number) ensure_persistent_disk_exists(context, namespace, number, args.size, args.type) with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w') as tmp: tmp.write(t.format(image = tag, number = number, gcloud_bucket = gcloud_bucket(namespace=namespace), pd_name = pd_name(context=context, namespace=namespace, number=number), health_delay = args.health_delay, pull_policy = util.pull_policy(args))) tmp.flush() util.update_deployment(tmp.name)
def resize_cluster(args): prefix = util.get_cluster_prefix() if args.name: group = '{prefix}-{name}-minion-group'.format(prefix=prefix, name=args.name) else: group = '{prefix}-minion-group'.format(prefix=prefix) util.run(['gcloud', 'compute', 'instance-groups', 'managed', 'resize', group, '--size', str(args.size)])
def cluster_env(args, prefix=None): if prefix is None: prefix = util.get_cluster_prefix() cluster_ip_range = available_cluster_ip_range() # see https://github.com/kubernetes/kubernetes/blob/master/cluster/gce/config-default.sh for env vars env = { 'KUBERNETES_PROVIDER': 'gce', 'KUBE_MASTER': prefix + '-master', 'KUBE_ENABLE_CLUSTER_MONITORING': 'google', 'KUBE_GCE_ZONE': args.zone, 'NODE_SIZE': args.node_size, 'NUM_NODES': str(args.min_nodes), 'NODE_DISK_TYPE': 'pd-ssd' if args.node_ssd else 'pd-standard', 'NODE_DISK_SIZE': "%sGB" % args.node_disk_size, 'PREEMPTIBLE_NODE': 'true' if args.preemptible else 'false', 'KUBE_GCE_INSTANCE_PREFIX': prefix, 'KUBE_ENABLE_NODE_AUTOSCALER': 'true' if args.min_nodes < args.max_nodes else 'false', 'KUBE_AUTOSCALER_MIN_NODES': str(args.min_nodes), 'KUBE_AUTOSCALER_MAX_NODES': str(args.max_nodes), 'CLUSTER_IP_RANGE': cluster_ip_range, 'KUBE_GCE_MASTER_PROJECT': 'google-containers', # gcloud compute images list --project google-containers 'KUBE_OS_DISTRIBUTION': 'debian', 'KUBE_GCE_MASTER_IMAGE': 'container-v1-3-v20160604', 'KUBE_GCE_NODE_IMAGE': 'container-v1-3-v20160604', 'KUBE_ROOT': KUBE_ROOT, #'KUBE_GCE_MASTER_PROJECT' : 'ubuntu-os-cloud', # gcloud compute images list --project google-containers #'KUBE_OS_DISTRIBUTION' : 'trusty', #'KUBE_GCE_MASTER_IMAGE' : 'ubuntu-1404-trusty-v20160627', #'KUBE_GCE_NODE_IMAGE' : 'ubuntu-1404-trusty-v20160627', # ubuntu didn't work -- NO DNS! } if hasattr(args, 'master_size'): env['MASTER_SIZE'] = args.master_size if hasattr(args, 'master_disk_size'): env['MASTER_DISK_SIZE'] = "%sGB" % args.master_disk_size env.update(os.environ) return env
def run_on_kubernetes(args): context = util.get_cluster_prefix() namespace = util.get_current_namespace() tag = util.get_tag(args, NAME, build) t = open('daemon.yaml').read() with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w') as tmp: tmp.write(t.format(image = tag, namespace = util.get_current_namespace(), pull_policy = util.pull_policy(args))) tmp.flush() util.update_daemonset(tmp.name)
def run_on_kubernetes(args): context = util.get_cluster_prefix() namespace = util.get_current_namespace() args.local = False # so tag is for gcloud tag = util.get_tag(args, NAME, build) t = open('storage-daemon.yaml').read() with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w') as tmp: tmp.write(t.format(image = tag, namespace = util.get_current_namespace(), pull_policy = util.pull_policy(args))) tmp.flush() util.update_daemonset(tmp.name)
def delete_instance_group(args): base = util.get_cluster_prefix() + '-' + args.name + "-minion" group = base + '-group' template = base + '-template' util.run([ "gcloud", "--quiet", "compute", "instance-groups", "managed", "delete", group ]) util.run([ "gcloud", "--quiet", "compute", "instance-templates", "delete", template ])
def delete_cluster(args): if input("**This is VERY dangerous. ** Delete Cluster '%s'? type 'yes sir' to delete it: "%util.get_kube_context()) != 'yes sir': print("canceling") return # IMPORTANT: shutdown all deployments *and* services first; otherwise we end up with # a random load balancer left laying around, which costs, and will never be used again. delete_all() env = { 'KUBE_GCE_INSTANCE_PREFIX' : util.get_cluster_prefix(), 'KUBE_GCE_ZONE' : args.zone } env.update(os.environ) util.run(join(CLUSTER, 'kube-down.sh'), env=env)
def run_on_kubernetes(args): context = util.get_cluster_prefix() namespace = util.get_current_namespace() tag = util.get_tag(args, NAME, build) # ensure there is a rethinkdb secret, even if blank, so that daemon will start with reduced functionality util.ensure_secret_exists('rethinkdb-password', 'rethinkdb') t = open('storage-daemon.yaml').read() with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w') as tmp: tmp.write( t.format(image=tag, namespace=util.get_current_namespace(), pull_policy=util.pull_policy(args))) tmp.flush() util.update_daemonset(tmp.name)
def run_on_kubernetes(args): create_kubectl_secret() label_preemptible_nodes() args.local = False # so tag is for gcloud tag = util.get_tag(args, NAME, build) t = open(join('conf', '{name}.template.yaml'.format(name=NAME))).read() with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w') as tmp: tmp.write(t.format(image = tag, cluster_prefix = util.get_cluster_prefix(), node_selector = node_selector(), pull_policy = util.pull_policy(args))) tmp.flush() util.update_deployment(tmp.name)
def run_on_kubernetes(args): context = util.get_cluster_prefix() namespace = util.get_current_namespace() args.local = False # so tag is for gcloud tag = util.get_tag(args, NAME, build) # ensure there is a rethinkdb secret, even if blank, so that daemon will start with reduced functionality util.ensure_secret_exists('rethinkdb-password', 'rethinkdb') t = open('storage-daemon.yaml').read() with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w') as tmp: tmp.write(t.format(image = tag, namespace = util.get_current_namespace(), pull_policy = util.pull_policy(args))) tmp.flush() util.update_daemonset(tmp.name)
def create_kubectl_secret(): """ Ensure that the kubectl secret needed for using kubectl instead of the pod to use this cluster/namespace exists. """ if SECRET_NAME not in util.get_secrets(): with tempfile.TemporaryDirectory() as tmp: target = join(tmp, 'config') config = json.loads(util.run(['kubectl', 'config', 'view', '--raw', '-o=json'], get_output=True, verbose=False)) prefix = util.get_cluster_prefix() # Include only secret info that is relevant to this cluster (a mild security measure -- we can't restrict namespace btw). for k in ['contexts', 'clusters', 'users']: config[k] = [x for x in config[k] if x['name'].endswith(prefix)] open(join(tmp, 'config'), 'w').write(yaml.dump(config)) util.create_secret(SECRET_NAME, tmp)
def delete(args): if len(args.number) == 0: if args.obliterate_disk: raise ValueError("you must explicitly specify the nodes when using --obliterate-disk") args.number = all_node_numbers() for number in args.number: deployment_name = "{name}{number}".format(name=NAME, number=number) util.stop_deployment(deployment_name) if args.obliterate_disk and args.number: context = util.get_cluster_prefix() namespace = util.get_current_namespace() what = "%s-%s"%(context, namespace) if args.obliterate_disk == what: delete_persistent_disks(context, namespace, args.number) else: raise ValueError("to obliterate the disk you must do --obliterate-disk=%s"%what)
def run_on_kubernetes(args): validate_project_ids(args) context = util.get_cluster_prefix() namespace = util.get_current_namespace() args.local = False # so tag is for gcloud tag = util.get_tag(args, NAME, build) t = open(join('conf', '{name}.template.yaml'.format(name=NAME))).read() with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w') as tmp: tmp.write(t.format(image = tag, project_id = args.project_id, namespace = namespace, storage_server = args.storage_server, disk_size = args.disk_size, pull_policy = util.pull_policy(args))) tmp.flush() util.update_deployment(tmp.name)
def run_on_kubernetes(args): validate_project_ids(args) context = util.get_cluster_prefix() namespace = util.get_current_namespace() args.local = False # so tag is for gcloud tag = util.get_tag(args, NAME, build) t = open(join('conf', '{name}.template.yaml'.format(name=NAME))).read() with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w') as tmp: tmp.write( t.format(image=tag, project_id=args.project_id, namespace=namespace, storage_server=args.storage_server, disk_size=args.disk_size, pull_policy=util.pull_policy(args))) tmp.flush() util.update_deployment(tmp.name)
def create_instance_group(args): if '_' in args.name: raise ValueError("name must not contain an underscore (_)") if args.min_nodes > args.max_nodes: args.max_nodes = args.min_nodes if args.cost: c = cost_of_cluster(node_size = args.node_size, node_disk_type = 'pd-ssd' if args.node_ssd else 'pd-standard', node_disk_size = args.node_disk_size, min_nodes = args.min_nodes, max_nodes = args.max_nodes, preemptible = not args.non_preemptible) print(c) return if not args.name: raise RuntimeError("you must specify a name") prefix = util.get_cluster_prefix() # KUBE_USE_EXISTING_MASTER -- figured out by looking at https://github.com/kubernetes/kubernetes/blob/master/cluster/gce/util.sh env = { 'KUBERNETES_PROVIDER' : 'gce', 'KUBE_MASTER' : prefix + '-master', 'KUBE_ENABLE_CLUSTER_MONITORING' : 'google', 'KUBE_GCE_ZONE' : args.zone, 'NODE_SIZE' : args.node_size, 'NUM_NODES' : str(args.min_nodes), 'NODE_DISK_TYPE' : 'pd-ssd' if args.node_ssd else 'pd-standard', 'NODE_DISK_SIZE' : "%sGB"%args.node_disk_size, 'PREEMPTIBLE_NODE' : 'false' if args.non_preemptible else 'true', 'KUBE_GCE_INSTANCE_PREFIX' : prefix, 'NEW_GROUP_PREFIX' : prefix + '-' + args.name, 'KUBE_ENABLE_NODE_AUTOSCALER' : 'true' if args.min_nodes < args.max_nodes else 'false', 'KUBE_AUTOSCALER_MIN_NODES' : str(args.min_nodes), 'KUBE_AUTOSCALER_MAX_NODES' : str(args.max_nodes), 'KUBE_ROOT' : KUBE_ROOT # required by the kube-add.sh script } env.update(os.environ) # Copy over and run our own script for adding a new managed instance group! # I wrote this script based on reading the kubernetes shell scripts for hours... (ws) os.chdir(SCRIPT_PATH) util.run('./kube-add.sh', env=env)
def autoscale_cluster(args): if args.min_nodes is not None and args.max_nodes < args.min_nodes: args.min_nodes = args.max_nodes prefix = util.get_cluster_prefix() if args.name: group = '{prefix}-{name}-minion-group'.format(prefix=prefix, name=args.name) else: group = '{prefix}-minion-group'.format(prefix=prefix) v = ['gcloud', 'compute', 'instance-groups', 'managed', 'set-autoscaling', group, '--max-num-replicas', str(args.max_nodes)] if args.min_nodes is not None: v.append('--min-num-replicas') v.append(str(args.min_nodes)) if args.cpu_percent is not None: v.append("--scale-based-on-cpu") v.append("--target-cpu-utilization") v.append(str(args.cpu_percent/100.0)) util.run(v)
def run_on_kubernetes(args): create_kubectl_secret() args.local = False # so tag is for gcloud tag = util.get_tag(args, NAME, build) t = open(join('conf', '{name}.template.yaml'.format(name=NAME))).read() if args.project_tag: default_image = util.gcloud_docker_repo('smc-project:' + args.project_tag) else: default_image = util.gcloud_most_recent_image('smc-project') with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w') as tmp: tmp.write(t.format(image = tag, namespace = util.get_current_namespace(), cluster_prefix = util.get_cluster_prefix(), default_image = default_image, node_selector = node_selector(), pull_policy = util.pull_policy(args))) tmp.flush() util.update_deployment(tmp.name)
def delete(args): if len(args.number) == 0: if args.obliterate_disk: raise ValueError( "you must explicitly specify the nodes when using --obliterate-disk" ) args.number = all_node_numbers() for number in args.number: deployment_name = "{name}{number}".format(name=NAME, number=number) util.stop_deployment(deployment_name) if args.obliterate_disk and args.number: context = util.get_cluster_prefix() namespace = util.get_current_namespace() what = "%s-%s" % (context, namespace) if args.obliterate_disk == what: delete_persistent_disks(context, namespace, args.number) else: raise ValueError( "to obliterate the disk you must do --obliterate-disk=%s" % what)
def run_on_kubernetes(args): context = util.get_cluster_prefix() namespace = util.get_current_namespace() if len(args.number) == 0: # Figure out the nodes based on the names of persistent disks, or just node 0 if none. args.number = range(max(1,len(get_persistent_disks(context, namespace)))) ensure_services_exist() util.ensure_secret_exists('rethinkdb-password', 'rethinkdb') args.local = False # so tag is for gcloud tag = util.get_tag(args, NAME, build) t = open(join('conf', '{name}.template.yaml'.format(name=NAME))).read() for number in args.number: ensure_persistent_disk_exists(context, namespace, number, args.size, args.type) with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w') as tmp: tmp.write(t.format(image = tag, number = number, pd_name = pd_name(context=context, namespace=namespace, number=number), health_delay = args.health_delay, pull_policy = util.pull_policy(args))) tmp.flush() util.update_deployment(tmp.name)
def cluster_env(args, prefix=None): if prefix is None: prefix = util.get_cluster_prefix() cluster_ip_range = available_cluster_ip_range() # see https://github.com/kubernetes/kubernetes/blob/master/cluster/gce/config-default.sh for env vars env = { 'KUBERNETES_PROVIDER' : 'gce', 'KUBE_MASTER' : prefix + '-master', 'KUBE_ENABLE_CLUSTER_MONITORING' : 'google', 'KUBE_GCE_ZONE' : args.zone, 'NODE_SIZE' : args.node_size, 'NUM_NODES' : str(args.min_nodes), 'NODE_DISK_TYPE' : 'pd-ssd' if args.node_ssd else 'pd-standard', 'NODE_DISK_SIZE' : "%sGB"%args.node_disk_size, 'PREEMPTIBLE_NODE' : 'true' if args.preemptible else 'false', 'KUBE_GCE_INSTANCE_PREFIX' : prefix, 'KUBE_ENABLE_NODE_AUTOSCALER' : 'true' if args.min_nodes < args.max_nodes else 'false', 'KUBE_AUTOSCALER_MIN_NODES' : str(args.min_nodes), 'KUBE_AUTOSCALER_MAX_NODES' : str(args.max_nodes), 'CLUSTER_IP_RANGE' : cluster_ip_range, 'KUBE_GCE_MASTER_PROJECT' : 'google-containers', # gcloud compute images list --project google-containers 'KUBE_OS_DISTRIBUTION' : 'debian', 'KUBE_GCE_MASTER_IMAGE' : 'container-v1-3-v20160604', 'KUBE_GCE_NODE_IMAGE' : 'container-v1-3-v20160604', 'KUBE_ROOT' : KUBE_ROOT, #'KUBE_GCE_MASTER_PROJECT' : 'ubuntu-os-cloud', # gcloud compute images list --project google-containers #'KUBE_OS_DISTRIBUTION' : 'trusty', #'KUBE_GCE_MASTER_IMAGE' : 'ubuntu-1404-trusty-v20160627', #'KUBE_GCE_NODE_IMAGE' : 'ubuntu-1404-trusty-v20160627', # ubuntu didn't work -- NO DNS! } if hasattr(args, 'master_size'): env['MASTER_SIZE'] = args.master_size if hasattr(args, 'master_disk_size'): env['MASTER_DISK_SIZE'] ="%sGB"%args.master_disk_size env.update(os.environ) return env
def run_on_kubernetes(args): create_gcloud_secret() context = util.get_cluster_prefix() namespace = util.get_current_namespace() if len(args.number) == 0: # Figure out the nodes based on the names of persistent disks, or just node 0 if none. args.number = range( max(1, len(get_persistent_disks(context, namespace)))) if 'storage-projects' not in util.get_services(): util.run(['kubectl', 'create', '-f', 'conf/service.yaml']) args.local = False # so tag is for gcloud tag = util.get_tag(args, NAME, build) if not args.tag: tag = tag[:tag.rfind( '-')] # get rid of the final -[service] part of the tag. t = open(join('conf', '{name}.template.yaml'.format(name=NAME))).read() ensure_ssh() for number in args.number: deployment_name = "{name}{number}".format(name=NAME, number=number) ensure_persistent_disk_exists(context, namespace, number, args.size, args.type) with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w') as tmp: tmp.write( t.format(image=tag, number=number, gcloud_bucket=gcloud_bucket(namespace=namespace), pd_name=pd_name(context=context, namespace=namespace, number=number), health_delay=args.health_delay, pull_policy=util.pull_policy(args))) tmp.flush() util.update_deployment(tmp.name)
def delete_instance_group(args): base = util.get_cluster_prefix() + '-' + args.name + "-minion" group = base + '-group' template = base + '-template' util.run(["gcloud", "--quiet", "compute", "instance-groups", "managed", "delete", group]) util.run(["gcloud", "--quiet", "compute", "instance-templates", "delete", template])
def resize_cluster(args): util.run(['gcloud', 'compute', 'instance-groups', 'managed', 'resize', util.get_cluster_prefix() + '-minion-group', '--size', str(args.size)])
def ssh(args): v = util.get_nodes() if args.name: prefix = util.get_cluster_prefix() + '-' + args.name + '-' v = [x for x in v if x.startswith(prefix)] util.tmux_ssh(v, sync=not args.no_sync)