Beispiel #1
0
def remove_node(ip, ntype):
    client, _ = util.init_k8s()

    pod = util.get_pod_from_ip(client, ip)
    hostname = 'ip-%s.ec2.internal' % (ip.replace('.', '-'))

    prev_count = util.get_previous_count(client, ntype)

    util.run_process(['./delete_node.sh', hostname, ntype, str(prev_count), str(prev_count - 1)])
Beispiel #2
0
def remove_node(ip, ntype):
    client, _ = util.init_k8s()

    pod = util.get_pod_from_ip(client, ip)
    hostname = 'ip-%s.ec2.internal' % (ip.replace('.', '-'))

    podname = pod.metadata.name
    client.delete_namespaced_pod(name=podname,
                                 namespace=util.NAMESPACE,
                                 body=k8s.client.V1DeleteOptions())
    client.delete_node(name=hostname, body=k8s.client.V1DeleteOptions())

    prev_count = util.get_previous_count(client, ntype)
    util.run_process(['./modify_ig.sh', ntype, str(prev_count - 1)])
Beispiel #3
0
def run():
    context = zmq.Context(1)
    client, apps_client = util.init_k8s()

    prefix = os.path.join(os.environ['HYDRO_HOME'], 'cluster/hydro/cluster')

    node_add_socket = context.socket(zmq.PULL)
    node_add_socket.bind('ipc:///tmp/node_add')

    node_remove_socket = context.socket(zmq.PULL)
    node_remove_socket.bind('ipc:///tmp/node_remove')

    poller = zmq.Poller()
    poller.register(node_add_socket, zmq.POLLIN)
    poller.register(node_remove_socket, zmq.POLLIN)

    cfile = '/hydro/anna/conf/kvs-base.yml'

    while True:
        socks = dict(poller.poll(timeout=1000))

        if node_add_socket in socks and socks[node_add_socket] == zmq.POLLIN:
            msg = node_add_socket.recv_string()
            args = msg.split(':')

            ntype = args[0]
            num = int(args[1])
            logging.info('Adding %d new %s node(s)...' % (num, ntype))

            add_nodes(client,
                      apps_client,
                      cfile, [ntype], [num],
                      prefix=prefix)
            logging.info('Successfully added %d %s node(s).' % (num, ntype))

        if node_remove_socket in socks and socks[node_remove_socket] == \
                zmq.POLLIN:
            msg = node_remove_socket.recv_string()
            args = msg.split(':')

            ntype = args[0]
            ip = args[1]

            remove_node(ip, ntype)
            logging.info('Successfully removed node %s.' % (ip))
Beispiel #4
0
def run(self_ip):
    context = zmq.Context(1)

    pusher_cache = SocketCache(context, zmq.PUSH)

    restart_pull_socket = context.socket(zmq.REP)
    restart_pull_socket.bind('tcp://*:7000')

    churn_pull_socket = context.socket(zmq.PULL)
    churn_pull_socket.bind('tcp://*:7001')

    list_executors_socket = context.socket(zmq.PULL)
    list_executors_socket.bind('tcp://*:7002')

    function_status_socket = context.socket(zmq.PULL)
    function_status_socket.bind('tcp://*:7003')

    list_schedulers_socket = context.socket(zmq.REP)
    list_schedulers_socket.bind('tcp://*:7004')

    executor_depart_socket = context.socket(zmq.PULL)
    executor_depart_socket.bind('tcp://*:7005')

    statistics_socket = context.socket(zmq.PULL)
    statistics_socket.bind('tcp://*:7006')

    pin_accept_socket = context.socket(zmq.PULL)
    pin_accept_socket.setsockopt(zmq.RCVTIMEO, 10000) # 10 seconds.
    pin_accept_socket.bind('tcp://*:' + PIN_ACCEPT_PORT)

    poller = zmq.Poller()
    poller.register(restart_pull_socket, zmq.POLLIN)
    poller.register(churn_pull_socket, zmq.POLLIN)
    poller.register(function_status_socket, zmq.POLLIN)
    poller.register(list_executors_socket, zmq.POLLIN)
    poller.register(list_schedulers_socket, zmq.POLLIN)
    poller.register(executor_depart_socket, zmq.POLLIN)
    poller.register(statistics_socket, zmq.POLLIN)

    add_push_socket = context.socket(zmq.PUSH)
    add_push_socket.connect('ipc:///tmp/node_add')

    remove_push_socket = context.socket(zmq.PUSH)
    remove_push_socket.connect('ipc:///tmp/node_remove')

    client, _ = util.init_k8s()

    scaler = DefaultScaler(self_ip, context, add_push_socket, remove_push_socket, pin_accept_socket)
    policy = DefaultHydroPolicy(scaler)

    # Tracks the self-reported statuses of each executor thread in the system.
    executor_statuses = {}

    # Tracks of which executors are departing. This is used to ensure all
    # threads acknowledge that they are finished before we remove a thread from
    # the system.
    departing_executors = {}

    # Tracks how often each function is called.
    function_frequencies = {}

    # Tracks the aggregated runtime for each function.
    function_runtimes = {}

    # Tracks the arrival times of DAG requests.
    arrival_times = {}

    # Tracks how often each DAG is called.
    dag_frequencies = {}

    # Tracks how long each DAG request spends in the system, end to end.
    dag_runtimes = {}

    start = time.time()
    while True:
        socks = dict(poller.poll(timeout=1000))

        if (churn_pull_socket in socks and socks[churn_pull_socket] ==
                zmq.POLLIN):
            msg = churn_pull_socket.recv_string()
            args = msg.split(':')

            if args[0] == 'add':
                scaler.add_vms(args[2], args[1])
            elif args[0] == 'remove':
                scaler.remove_vms(args[2], args[1])

        if (restart_pull_socket in socks and socks[restart_pull_socket] ==
                zmq.POLLIN):
            msg = restart_pull_socket.recv_string()
            args = msg.split(':')

            pod = util.get_pod_from_ip(client, args[1])
            count = str(pod.status.container_statuses[0].restart_count)

            restart_pull_socket.send_string(count)

        if (list_executors_socket in socks and socks[list_executors_socket] ==
                zmq.POLLIN):
            # We can safely ignore this message's contents, and the response
            # does not depend on it.
            response_ip = list_executors_socket.recv_string()

            ips = StringSet()
            for ip in util.get_pod_ips(client, 'role=function'):
                ips.keys.append(ip)
            for ip in util.get_pod_ips(client, 'role=gpu'):
                ips.keys.append(ip)

            sckt = pusher_cache.get(response_ip)
            sckt.send(ips.SerializeToString())

        if (function_status_socket in socks and
                socks[function_status_socket] == zmq.POLLIN):
            # Dequeue all available ThreadStatus messages rather than doing
            # them one at a time---this prevents starvation if other operations
            # (e.g., pin) take a long time.
            while True:
                status = ThreadStatus()
                try:
                    status.ParseFromString(function_status_socket.recv(zmq.DONTWAIT))
                except:
                    break # We've run out of messages.

                key = (status.ip, status.tid)

                # If this executor is one of the ones that's currently departing,
                # we can just ignore its status updates since we don't want
                # utilization to be skewed downwards. The reason we might still
                # receive this message is because the depart message may not have
                # arrived when this was sent.
                if key[0] in departing_executors:
                    continue

                executor_statuses[key] = status
                # logging.info(('Received thread status update from %s:%d: %.4f ' +
                #               'occupancy, %d functions pinned') %
                #              (status.ip, status.tid, status.utilization,
                #               len(status.functions)))
                logging.info(f"Functions {status.functions} is placed on node "
                             f"{status.ip}:{status.tid}")

        if (list_schedulers_socket in socks and
                socks[list_schedulers_socket] == zmq.POLLIN):
            # We can safely ignore this message's contents, and the response
            # does not depend on it.
            list_schedulers_socket.recv_string()

            ips = StringSet()
            for ip in util.get_pod_ips(client, 'role=scheduler'):
                ips.keys.append(ip)

            list_schedulers_socket.send(ips.SerializeToString())

        if (executor_depart_socket in socks and
                socks[executor_depart_socket] == zmq.POLLIN):
            ip = executor_depart_socket.recv_string()
            departing_executors[ip] -= 1

            # We wait until all the threads at this executor have acknowledged
            # that they are ready to leave, and we then remove the VM from the
            # system.
            if departing_executors[ip] == 0:
                logging.info('Removing node with ip %s' % ip)
                scaler.remove_vms('function', ip)
                del departing_executors[ip]

        if (statistics_socket in socks and
                socks[statistics_socket] == zmq.POLLIN):
            stats = ExecutorStatistics()
            stats.ParseFromString(statistics_socket.recv())

            # Aggregates statistics reported for individual functions including
            # call frequencies, processed requests, and total runtimes.
            for fstats in stats.functions:
                fname = fstats.name

                if fname not in function_frequencies:
                    function_frequencies[fname] = 0

                if fname not in function_runtimes:
                    function_runtimes[fname] = (0.0, 0)

                if fstats.runtime:
                    old_latency = function_runtimes[fname]

                    # This tracks how many calls were processed for the
                    # function and the length of the total runtime of all
                    # calls.
                    function_runtimes[fname] = (
                          old_latency[0] + sum(fstats.runtime),
                          old_latency[1] + fstats.call_count)
                else:
                    # This tracks how many calls are made to the function.
                    function_frequencies[fname] += fstats.call_count

            # Aggregates statistics for DAG requests, including call
            # frequencies, arrival rates, and end-to-end runtimes.
            for dstats in stats.dags:
                dname = dstats.name

                # Tracks the interarrival rates of requests to this function as
                # perceived by the scheduler.
                if dname not in arrival_times:
                    arrival_times[dname] = []

                arrival_times[dname] += list(dstats.interarrival)

                # Tracks how many calls to this DAG were received.
                if dname not in dag_frequencies:
                    dag_frequencies[dname] = 0

                dag_frequencies[dname] += dstats.call_count

                # Tracks the end-to-end runtime of individual requests
                # completed in the last epoch.
                if dname not in dag_runtimes:
                    dag_runtimes[dname] = []

                for rt in dstats.runtimes:
                    dag_runtimes[dname].append(rt)

        end = time.time()
        if end - start > REPORT_PERIOD:
            logging.info('Checking hash ring...')
            check_hash_ring(client, context)

            # Invoke the configured policy to check system load and respond
            # appropriately.
            policy.replica_policy(function_frequencies, function_runtimes,
                                  dag_runtimes, executor_statuses,
                                  arrival_times)
            # TODO(simon): this turn off node scaling policy, which is what we want for static exp env
            # policy.executor_policy(executor_statuses, departing_executors)

            # Clears all metadata that was passed in for this epoch.
            function_runtimes.clear()
            function_frequencies.clear()
            dag_runtimes.clear()
            arrival_times.clear()

            # Restart the timer for the next reporting epoch.
            start = time.time()
Beispiel #5
0
def create_cluster(mem_count, ebs_count, func_count, sched_count, route_count,
                   bench_count, cfile, ssh_key, cluster_name, kops_bucket,
                   aws_key_id, aws_key):

    if 'HYDRO_HOME' not in os.environ:
        raise ValueError('HYDRO_HOME environment variable must be set to be ' +
                         'the directory where all Hydro project repos are ' +
                         'located.')
    prefix = os.path.join(os.environ['HYDRO_HOME'], 'cluster/hydro/cluster')

    util.run_process(['./create_cluster_object.sh', kops_bucket, ssh_key])

    client, apps_client = util.init_k8s()

    print('Creating management pods...')
    management_spec = util.load_yaml('yaml/pods/management-pod.yml', prefix)
    env = management_spec['spec']['containers'][0]['env']

    util.replace_yaml_val(env, 'AWS_ACCESS_KEY_ID', aws_key_id)
    util.replace_yaml_val(env, 'AWS_SECRET_ACCESS_KEY', aws_key)
    util.replace_yaml_val(env, 'KOPS_STATE_STORE', kops_bucket)
    util.replace_yaml_val(env, 'HYDRO_CLUSTER_NAME', cluster_name)

    client.create_namespaced_pod(namespace=util.NAMESPACE,
                                 body=management_spec)

    # Waits until the management pod starts to move forward -- we need to do
    # this because other pods depend on knowing the management pod's IP address.
    management_ip = util.get_pod_ips(client,
                                     'role=management',
                                     is_running=True)[0]

    # Copy kube config file to management pod, so it can execute kubectl
    # commands, in addition to SSH keys and KVS config.
    management_podname = management_spec['metadata']['name']
    kcname = management_spec['spec']['containers'][0]['name']

    os.system('cp %s anna-config.yml' % cfile)
    kubecfg = os.path.join(os.environ['HOME'], '.kube/config')
    util.copy_file_to_pod(client, kubecfg, management_podname, '/root/.kube/',
                          kcname)
    util.copy_file_to_pod(client, ssh_key, management_podname, '/root/.ssh/',
                          kcname)
    util.copy_file_to_pod(client, ssh_key + '.pub', management_podname,
                          '/root/.ssh/', kcname)
    util.copy_file_to_pod(client, 'anna-config.yml', management_podname,
                          '/hydro/anna/conf/', kcname)

    # Start the monitoring pod.
    mon_spec = util.load_yaml('yaml/pods/monitoring-pod.yml', prefix)
    util.replace_yaml_val(mon_spec['spec']['containers'][0]['env'], 'MGMT_IP',
                          management_ip)
    client.create_namespaced_pod(namespace=util.NAMESPACE, body=mon_spec)

    # Wait until the monitoring pod is finished creating to get its IP address
    # and then copy KVS config into the monitoring pod.
    util.get_pod_ips(client, 'role=monitoring')
    util.copy_file_to_pod(client, 'anna-config.yml',
                          mon_spec['metadata']['name'], '/hydro/anna/conf/',
                          mon_spec['spec']['containers'][0]['name'])
    os.system('rm anna-config.yml')

    print('Creating %d routing nodes...' % (route_count))
    add_nodes(client, apps_client, cfile, ['routing'], [route_count], True,
              prefix)
    util.get_pod_ips(client, 'role=routing')

    print('Creating %d memory, %d ebs node(s)...' % (mem_count, ebs_count))
    add_nodes(client, apps_client, cfile, ['memory', 'ebs'],
              [mem_count, ebs_count], True, prefix)

    print('Creating routing service...')
    service_spec = util.load_yaml('yaml/services/routing.yml', prefix)
    client.create_namespaced_service(namespace=util.NAMESPACE,
                                     body=service_spec)

    print('Adding %d scheduler nodes...' % (sched_count))
    add_nodes(client, apps_client, cfile, ['scheduler'], [sched_count], True,
              prefix)
    util.get_pod_ips(client, 'role=scheduler')

    print('Adding %d function serving nodes...' % (func_count))
    add_nodes(client, apps_client, cfile, ['function'], [func_count], True,
              prefix)

    print('Creating function service...')
    service_spec = util.load_yaml('yaml/services/function.yml', prefix)
    client.create_namespaced_service(namespace=util.NAMESPACE,
                                     body=service_spec)

    print('Adding %d benchmark nodes...' % (bench_count))
    add_nodes(client, apps_client, cfile, ['benchmark'], [bench_count], True,
              prefix)

    print('Finished creating all pods...')
    os.system('touch setup_complete')
    util.copy_file_to_pod(client, 'setup_complete', management_podname,
                          '/hydro', kcname)
    os.system('rm setup_complete')

    sg_name = 'nodes.' + cluster_name
    sg = ec2_client.describe_security_groups(Filters=[{
        'Name': 'group-name',
        'Values': [sg_name]
    }])['SecurityGroups'][0]

    print('Authorizing ports for routing service...')

    permission = [{
        'FromPort': 6200,
        'IpProtocol': 'tcp',
        'ToPort': 6203,
        'IpRanges': [{
            'CidrIp': '0.0.0.0/0'
        }]
    }]

    ec2_client.authorize_security_group_ingress(GroupId=sg['GroupId'],
                                                IpPermissions=permission)

    routing_svc_addr = util.get_service_address(client, 'routing-service')
    function_svc_addr = util.get_service_address(client, 'function-service')
    print('The routing service can be accessed here: \n\t%s' %
          (routing_svc_addr))
    print('The function service can be accessed here: \n\t%s' %
          (function_svc_addr))