def remove_node(ip, ntype): client, _ = util.init_k8s() pod = util.get_pod_from_ip(client, ip) hostname = 'ip-%s.ec2.internal' % (ip.replace('.', '-')) prev_count = util.get_previous_count(client, ntype) util.run_process(['./delete_node.sh', hostname, ntype, str(prev_count), str(prev_count - 1)])
def remove_node(ip, ntype): client, _ = util.init_k8s() pod = util.get_pod_from_ip(client, ip) hostname = 'ip-%s.ec2.internal' % (ip.replace('.', '-')) podname = pod.metadata.name client.delete_namespaced_pod(name=podname, namespace=util.NAMESPACE, body=k8s.client.V1DeleteOptions()) client.delete_node(name=hostname, body=k8s.client.V1DeleteOptions()) prev_count = util.get_previous_count(client, ntype) util.run_process(['./modify_ig.sh', ntype, str(prev_count - 1)])
def run(): context = zmq.Context(1) client, apps_client = util.init_k8s() prefix = os.path.join(os.environ['HYDRO_HOME'], 'cluster/hydro/cluster') node_add_socket = context.socket(zmq.PULL) node_add_socket.bind('ipc:///tmp/node_add') node_remove_socket = context.socket(zmq.PULL) node_remove_socket.bind('ipc:///tmp/node_remove') poller = zmq.Poller() poller.register(node_add_socket, zmq.POLLIN) poller.register(node_remove_socket, zmq.POLLIN) cfile = '/hydro/anna/conf/kvs-base.yml' while True: socks = dict(poller.poll(timeout=1000)) if node_add_socket in socks and socks[node_add_socket] == zmq.POLLIN: msg = node_add_socket.recv_string() args = msg.split(':') ntype = args[0] num = int(args[1]) logging.info('Adding %d new %s node(s)...' % (num, ntype)) add_nodes(client, apps_client, cfile, [ntype], [num], prefix=prefix) logging.info('Successfully added %d %s node(s).' % (num, ntype)) if node_remove_socket in socks and socks[node_remove_socket] == \ zmq.POLLIN: msg = node_remove_socket.recv_string() args = msg.split(':') ntype = args[0] ip = args[1] remove_node(ip, ntype) logging.info('Successfully removed node %s.' % (ip))
def run(self_ip): context = zmq.Context(1) pusher_cache = SocketCache(context, zmq.PUSH) restart_pull_socket = context.socket(zmq.REP) restart_pull_socket.bind('tcp://*:7000') churn_pull_socket = context.socket(zmq.PULL) churn_pull_socket.bind('tcp://*:7001') list_executors_socket = context.socket(zmq.PULL) list_executors_socket.bind('tcp://*:7002') function_status_socket = context.socket(zmq.PULL) function_status_socket.bind('tcp://*:7003') list_schedulers_socket = context.socket(zmq.REP) list_schedulers_socket.bind('tcp://*:7004') executor_depart_socket = context.socket(zmq.PULL) executor_depart_socket.bind('tcp://*:7005') statistics_socket = context.socket(zmq.PULL) statistics_socket.bind('tcp://*:7006') pin_accept_socket = context.socket(zmq.PULL) pin_accept_socket.setsockopt(zmq.RCVTIMEO, 10000) # 10 seconds. pin_accept_socket.bind('tcp://*:' + PIN_ACCEPT_PORT) poller = zmq.Poller() poller.register(restart_pull_socket, zmq.POLLIN) poller.register(churn_pull_socket, zmq.POLLIN) poller.register(function_status_socket, zmq.POLLIN) poller.register(list_executors_socket, zmq.POLLIN) poller.register(list_schedulers_socket, zmq.POLLIN) poller.register(executor_depart_socket, zmq.POLLIN) poller.register(statistics_socket, zmq.POLLIN) add_push_socket = context.socket(zmq.PUSH) add_push_socket.connect('ipc:///tmp/node_add') remove_push_socket = context.socket(zmq.PUSH) remove_push_socket.connect('ipc:///tmp/node_remove') client, _ = util.init_k8s() scaler = DefaultScaler(self_ip, context, add_push_socket, remove_push_socket, pin_accept_socket) policy = DefaultHydroPolicy(scaler) # Tracks the self-reported statuses of each executor thread in the system. executor_statuses = {} # Tracks of which executors are departing. This is used to ensure all # threads acknowledge that they are finished before we remove a thread from # the system. departing_executors = {} # Tracks how often each function is called. function_frequencies = {} # Tracks the aggregated runtime for each function. function_runtimes = {} # Tracks the arrival times of DAG requests. arrival_times = {} # Tracks how often each DAG is called. dag_frequencies = {} # Tracks how long each DAG request spends in the system, end to end. dag_runtimes = {} start = time.time() while True: socks = dict(poller.poll(timeout=1000)) if (churn_pull_socket in socks and socks[churn_pull_socket] == zmq.POLLIN): msg = churn_pull_socket.recv_string() args = msg.split(':') if args[0] == 'add': scaler.add_vms(args[2], args[1]) elif args[0] == 'remove': scaler.remove_vms(args[2], args[1]) if (restart_pull_socket in socks and socks[restart_pull_socket] == zmq.POLLIN): msg = restart_pull_socket.recv_string() args = msg.split(':') pod = util.get_pod_from_ip(client, args[1]) count = str(pod.status.container_statuses[0].restart_count) restart_pull_socket.send_string(count) if (list_executors_socket in socks and socks[list_executors_socket] == zmq.POLLIN): # We can safely ignore this message's contents, and the response # does not depend on it. response_ip = list_executors_socket.recv_string() ips = StringSet() for ip in util.get_pod_ips(client, 'role=function'): ips.keys.append(ip) for ip in util.get_pod_ips(client, 'role=gpu'): ips.keys.append(ip) sckt = pusher_cache.get(response_ip) sckt.send(ips.SerializeToString()) if (function_status_socket in socks and socks[function_status_socket] == zmq.POLLIN): # Dequeue all available ThreadStatus messages rather than doing # them one at a time---this prevents starvation if other operations # (e.g., pin) take a long time. while True: status = ThreadStatus() try: status.ParseFromString(function_status_socket.recv(zmq.DONTWAIT)) except: break # We've run out of messages. key = (status.ip, status.tid) # If this executor is one of the ones that's currently departing, # we can just ignore its status updates since we don't want # utilization to be skewed downwards. The reason we might still # receive this message is because the depart message may not have # arrived when this was sent. if key[0] in departing_executors: continue executor_statuses[key] = status # logging.info(('Received thread status update from %s:%d: %.4f ' + # 'occupancy, %d functions pinned') % # (status.ip, status.tid, status.utilization, # len(status.functions))) logging.info(f"Functions {status.functions} is placed on node " f"{status.ip}:{status.tid}") if (list_schedulers_socket in socks and socks[list_schedulers_socket] == zmq.POLLIN): # We can safely ignore this message's contents, and the response # does not depend on it. list_schedulers_socket.recv_string() ips = StringSet() for ip in util.get_pod_ips(client, 'role=scheduler'): ips.keys.append(ip) list_schedulers_socket.send(ips.SerializeToString()) if (executor_depart_socket in socks and socks[executor_depart_socket] == zmq.POLLIN): ip = executor_depart_socket.recv_string() departing_executors[ip] -= 1 # We wait until all the threads at this executor have acknowledged # that they are ready to leave, and we then remove the VM from the # system. if departing_executors[ip] == 0: logging.info('Removing node with ip %s' % ip) scaler.remove_vms('function', ip) del departing_executors[ip] if (statistics_socket in socks and socks[statistics_socket] == zmq.POLLIN): stats = ExecutorStatistics() stats.ParseFromString(statistics_socket.recv()) # Aggregates statistics reported for individual functions including # call frequencies, processed requests, and total runtimes. for fstats in stats.functions: fname = fstats.name if fname not in function_frequencies: function_frequencies[fname] = 0 if fname not in function_runtimes: function_runtimes[fname] = (0.0, 0) if fstats.runtime: old_latency = function_runtimes[fname] # This tracks how many calls were processed for the # function and the length of the total runtime of all # calls. function_runtimes[fname] = ( old_latency[0] + sum(fstats.runtime), old_latency[1] + fstats.call_count) else: # This tracks how many calls are made to the function. function_frequencies[fname] += fstats.call_count # Aggregates statistics for DAG requests, including call # frequencies, arrival rates, and end-to-end runtimes. for dstats in stats.dags: dname = dstats.name # Tracks the interarrival rates of requests to this function as # perceived by the scheduler. if dname not in arrival_times: arrival_times[dname] = [] arrival_times[dname] += list(dstats.interarrival) # Tracks how many calls to this DAG were received. if dname not in dag_frequencies: dag_frequencies[dname] = 0 dag_frequencies[dname] += dstats.call_count # Tracks the end-to-end runtime of individual requests # completed in the last epoch. if dname not in dag_runtimes: dag_runtimes[dname] = [] for rt in dstats.runtimes: dag_runtimes[dname].append(rt) end = time.time() if end - start > REPORT_PERIOD: logging.info('Checking hash ring...') check_hash_ring(client, context) # Invoke the configured policy to check system load and respond # appropriately. policy.replica_policy(function_frequencies, function_runtimes, dag_runtimes, executor_statuses, arrival_times) # TODO(simon): this turn off node scaling policy, which is what we want for static exp env # policy.executor_policy(executor_statuses, departing_executors) # Clears all metadata that was passed in for this epoch. function_runtimes.clear() function_frequencies.clear() dag_runtimes.clear() arrival_times.clear() # Restart the timer for the next reporting epoch. start = time.time()
def create_cluster(mem_count, ebs_count, func_count, sched_count, route_count, bench_count, cfile, ssh_key, cluster_name, kops_bucket, aws_key_id, aws_key): if 'HYDRO_HOME' not in os.environ: raise ValueError('HYDRO_HOME environment variable must be set to be ' + 'the directory where all Hydro project repos are ' + 'located.') prefix = os.path.join(os.environ['HYDRO_HOME'], 'cluster/hydro/cluster') util.run_process(['./create_cluster_object.sh', kops_bucket, ssh_key]) client, apps_client = util.init_k8s() print('Creating management pods...') management_spec = util.load_yaml('yaml/pods/management-pod.yml', prefix) env = management_spec['spec']['containers'][0]['env'] util.replace_yaml_val(env, 'AWS_ACCESS_KEY_ID', aws_key_id) util.replace_yaml_val(env, 'AWS_SECRET_ACCESS_KEY', aws_key) util.replace_yaml_val(env, 'KOPS_STATE_STORE', kops_bucket) util.replace_yaml_val(env, 'HYDRO_CLUSTER_NAME', cluster_name) client.create_namespaced_pod(namespace=util.NAMESPACE, body=management_spec) # Waits until the management pod starts to move forward -- we need to do # this because other pods depend on knowing the management pod's IP address. management_ip = util.get_pod_ips(client, 'role=management', is_running=True)[0] # Copy kube config file to management pod, so it can execute kubectl # commands, in addition to SSH keys and KVS config. management_podname = management_spec['metadata']['name'] kcname = management_spec['spec']['containers'][0]['name'] os.system('cp %s anna-config.yml' % cfile) kubecfg = os.path.join(os.environ['HOME'], '.kube/config') util.copy_file_to_pod(client, kubecfg, management_podname, '/root/.kube/', kcname) util.copy_file_to_pod(client, ssh_key, management_podname, '/root/.ssh/', kcname) util.copy_file_to_pod(client, ssh_key + '.pub', management_podname, '/root/.ssh/', kcname) util.copy_file_to_pod(client, 'anna-config.yml', management_podname, '/hydro/anna/conf/', kcname) # Start the monitoring pod. mon_spec = util.load_yaml('yaml/pods/monitoring-pod.yml', prefix) util.replace_yaml_val(mon_spec['spec']['containers'][0]['env'], 'MGMT_IP', management_ip) client.create_namespaced_pod(namespace=util.NAMESPACE, body=mon_spec) # Wait until the monitoring pod is finished creating to get its IP address # and then copy KVS config into the monitoring pod. util.get_pod_ips(client, 'role=monitoring') util.copy_file_to_pod(client, 'anna-config.yml', mon_spec['metadata']['name'], '/hydro/anna/conf/', mon_spec['spec']['containers'][0]['name']) os.system('rm anna-config.yml') print('Creating %d routing nodes...' % (route_count)) add_nodes(client, apps_client, cfile, ['routing'], [route_count], True, prefix) util.get_pod_ips(client, 'role=routing') print('Creating %d memory, %d ebs node(s)...' % (mem_count, ebs_count)) add_nodes(client, apps_client, cfile, ['memory', 'ebs'], [mem_count, ebs_count], True, prefix) print('Creating routing service...') service_spec = util.load_yaml('yaml/services/routing.yml', prefix) client.create_namespaced_service(namespace=util.NAMESPACE, body=service_spec) print('Adding %d scheduler nodes...' % (sched_count)) add_nodes(client, apps_client, cfile, ['scheduler'], [sched_count], True, prefix) util.get_pod_ips(client, 'role=scheduler') print('Adding %d function serving nodes...' % (func_count)) add_nodes(client, apps_client, cfile, ['function'], [func_count], True, prefix) print('Creating function service...') service_spec = util.load_yaml('yaml/services/function.yml', prefix) client.create_namespaced_service(namespace=util.NAMESPACE, body=service_spec) print('Adding %d benchmark nodes...' % (bench_count)) add_nodes(client, apps_client, cfile, ['benchmark'], [bench_count], True, prefix) print('Finished creating all pods...') os.system('touch setup_complete') util.copy_file_to_pod(client, 'setup_complete', management_podname, '/hydro', kcname) os.system('rm setup_complete') sg_name = 'nodes.' + cluster_name sg = ec2_client.describe_security_groups(Filters=[{ 'Name': 'group-name', 'Values': [sg_name] }])['SecurityGroups'][0] print('Authorizing ports for routing service...') permission = [{ 'FromPort': 6200, 'IpProtocol': 'tcp', 'ToPort': 6203, 'IpRanges': [{ 'CidrIp': '0.0.0.0/0' }] }] ec2_client.authorize_security_group_ingress(GroupId=sg['GroupId'], IpPermissions=permission) routing_svc_addr = util.get_service_address(client, 'routing-service') function_svc_addr = util.get_service_address(client, 'function-service') print('The routing service can be accessed here: \n\t%s' % (routing_svc_addr)) print('The function service can be accessed here: \n\t%s' % (function_svc_addr))