Esempio n. 1
0
    def register_dag(self, name, functions, connections):
        '''
        Registers a new DAG with the system. This operation will fail if any of
        the functions provided cannot be identified in the system.

        name: A unique name for this DAG.
        functions: A list of names of functions to be included in this DAG.
        connections: A list of ordered pairs of function names that represent
        the edges in this DAG.
        '''

        flist = self._get_func_list()
        for fname in functions:
            if fname not in flist:
                logging.info(
                    'Function %s not registered. Please register before \
                    including it in a DAG.' % (fname))
                return False, None

        dag = Dag()
        dag.name = name
        dag.functions.extend(functions)
        for pair in connections:
            conn = dag.connections.add()
            conn.source = pair[0]
            conn.sink = pair[1]

        self.dag_create_sock.send(dag.SerializeToString())

        r = GenericResponse()
        r.ParseFromString(self.dag_create_sock.recv())

        return r.success, r.error
Esempio n. 2
0
def create_linear_dag(functions,
                      fnames,
                      kvs_client,
                      dname,
                      lattice_type=LWWPairLattice):
    dag = Dag()
    dag.name = dname

    prev = None

    for index, fname in enumerate(fnames):
        function = functions[index]
        create_function(function, kvs_client, fname, lattice_type)

        ref = dag.functions.add()
        ref.name = fname

        if prev:
            link = dag.connections.add()
            link.source = prev
            link.sink = fname

        prev = fname

    return dag
Esempio n. 3
0
def create_dag(dag_create_socket,
               pusher_cache,
               kvs,
               dags,
               policy,
               call_frequency,
               num_replicas=1):
    serialized = dag_create_socket.recv()

    dag = Dag()
    dag.ParseFromString(serialized)

    # We do not allow duplicate DAGs, so we return an error to the user if we
    # already know about this DAG.
    if dag.name in dags:
        sutils.error.error = DAG_ALREADY_EXISTS
        dag_create_socket.send(sutils.error.SerializeToString())
        return

    logging.info('Creating DAG %s.' % (dag.name))

    # We persist the DAG in the KVS, so other schedulers can read the DAG when
    # they hear about it.
    payload = LWWPairLattice(sutils.generate_timestamp(0), serialized)
    kvs.put(dag.name, payload)

    for fref in dag.functions:
        for _ in range(num_replicas):
            success = policy.pin_function(dag.name, fref)

            # The policy engine will only return False if it ran out of
            # resources on which to attempt to pin this function.
            if not success:
                logging.info(f'Creating DAG {dag.name} failed due to ' +
                             'insufficient resources.')
                sutils.error.error = NO_RESOURCES
                dag_create_socket.send(sutils.error.SerializeToString())

                # Unpin any previously pinned functions because the operation
                # failed.
                policy.discard_dag(dag, True)
                return

    # Only create this metadata after all functions have been successfully
    # created.
    for fref in dag.functions:
        if fref.name not in call_frequency:
            call_frequency[fref.name] = 0

    policy.commit_dag(dag.name)
    dags[dag.name] = (dag, utils.find_dag_source(dag))
    dag_create_socket.send(sutils.ok_resp)
Esempio n. 4
0
    def register_dag(self, name, functions, connections):
        '''
        Registers a new DAG with the system. This operation will fail if any of
        the functions provided cannot be identified in the system.

        name: A unique name for this DAG.
        functions: A list of names of functions to be included in this DAG.
        connections: A list of ordered pairs of function names that represent
        the edges in this DAG.
        '''

        flist = self._get_func_list()
        for fname in functions:
            if isinstance(fname, tuple):
                fname = fname[0]

            if fname not in flist:
                raise RuntimeError(
                    f'Function {fname} not registered. Please register before '
                    + 'including it in a DAG.')

        dag = Dag()
        dag.name = name
        for function in functions:
            ref = dag.functions.add()

            if type(function) == tuple:
                fname = function[0]
                invalids = function[1]
                ref.type = MULTIEXEC
            else:
                fname = function
                invalids = []

            ref.name = fname
            for invalid in invalids:
                ref.invalid_results.append(serializer.dump(invalid))

        for pair in connections:
            conn = dag.connections.add()
            conn.source = pair[0]
            conn.sink = pair[1]

        self.dag_create_sock.send(dag.SerializeToString())

        r = GenericResponse()
        r.ParseFromString(self.dag_create_sock.recv())

        return r.success, r.error
Esempio n. 5
0
    def _construct_dag_with_locations(self, source, sink):
        # Construct a simple, two-function DAG.
        dag = Dag()
        dag.name = 'dag'
        dag.functions.extend([source, sink])
        link = dag.connections.add()
        link.source = source
        link.sink = sink

        # Add the relevant metadata to the policy engine.
        source_address = (self.ip, 1)
        sink_address = (self.ip, 2)
        self.policy.function_locations[source] = {source_address}
        self.policy.function_locations[sink] = {sink_address}

        return dag, source_address, sink_address
Esempio n. 6
0
    def test_pin_reject(self):
        '''
        This test explicitly rejects a pin request from the policy and ensures
        that it tries again to pin on another node.
        '''
        # Create two unpinned executors.
        address_set = {(self.ip, 1), (self.ip, 2)}
        self.policy.unpinned_cpu_executors.update(address_set)

        # Create one failing and one successful response.
        self.pin_socket.inbox.append(sutils.ok_resp)
        self.pin_socket.inbox.append(sutils.error.SerializeToString())

        success = self.policy.pin_function(
            'dag', Dag.FunctionReference(name='function'), [])
        self.assertTrue(success)

        # Ensure that both remaining executors have been removed from unpinned
        # and that the DAG commit is pending.
        self.assertEqual(len(self.policy.unpinned_cpu_executors), 0)
        self.assertEqual(len(self.policy.pending_dags), 1)
Esempio n. 7
0
    def test_create_dag(self):
        '''
        This test creates a new DAG, checking that the correct pin messages are
        sent to executors and that it is persisted in the KVS correctly. It
        also checks that the server metadata was updated as expected.
        '''
        # Create a simple two-function DAG and add it to the inbound socket.
        source = 'source'
        sink = 'sink'
        dag_name = 'dag'

        dag = create_linear_dag([None, None], [source, sink], self.kvs_client,
                                dag_name)
        self.socket.inbox.append(dag.SerializeToString())

        # Add relevant metadata to the policy engine.
        address_set = {(self.ip, 1), (self.ip, 2)}
        self.policy.unpinned_executors.update(address_set)

        # Prepopulate the pin_accept socket with sufficient success messages.
        self.pin_socket.inbox.append(sutils.ok_resp)
        self.pin_socket.inbox.append(sutils.ok_resp)

        # Call the DAG creation method.
        dags = {}
        call_frequency = {}
        create_dag(self.socket, self.pusher_cache, self.kvs_client, dags,
                   self.policy, call_frequency)

        # Test that the correct metadata was created.
        self.assertTrue(dag_name in dags)
        created, dag_source = dags[dag_name]
        self.assertEqual(created, dag)
        self.assertEqual(len(dag_source), 1)
        self.assertEqual(list(dag_source)[0], source)
        self.assertTrue(source in call_frequency)
        self.assertTrue(sink in call_frequency)
        self.assertEqual(call_frequency[source], 0)
        self.assertEqual(call_frequency[sink], 0)

        # Test that the DAG is stored in the KVS correctly.
        result = self.kvs_client.get(dag_name)[dag_name]
        created = Dag()
        created.ParseFromString(result.reveal())
        self.assertEqual(created, dag)

        # Test that the correct response was returned to the user.
        self.assertTrue(len(self.socket.outbox), 1)
        response = GenericResponse()
        response.ParseFromString(self.socket.outbox.pop())
        self.assertTrue(response.success)

        # Test that the correct pin messages were sent.
        self.assertEqual(len(self.pusher_cache.socket.outbox), 2)
        messages = self.pusher_cache.socket.outbox
        function_set = {source, sink}
        for message in messages:
            self.assertTrue(':' in message)
            ip, fname = message.split(':')
            self.assertEqual(ip, self.ip)
            self.assertTrue(fname in function_set)
            function_set.discard(fname)

        self.assertEqual(len(function_set), 0)

        for address in address_set:
            self.assertTrue(
                get_pin_address(*address) in self.pusher_cache.addresses)

        # Test that the policy engine has the correct metadata stored.
        self.assertEqual(len(self.policy.unpinned_executors), 0)
        self.assertEqual(len(self.policy.pending_dags), 0)
        self.assertTrue(source in self.policy.function_locations)
        self.assertTrue(sink in self.policy.function_locations)

        self.assertEqual(len(self.policy.function_locations[source]), 1)
        self.assertEqual(len(self.policy.function_locations[sink]), 1)
Esempio n. 8
0
def scheduler(ip, mgmt_ip, route_addr):

    # If the management IP is not set, we are running in local mode.
    local = (mgmt_ip is None)
    kvs = AnnaTcpClient(route_addr, ip, local=local)

    scheduler_id = str(uuid.uuid4())

    context = zmq.Context(1)

    # A mapping from a DAG's name to its protobuf representation.
    dags = {}

    # Tracks how often a request for each function is received.
    call_frequency = {}

    # Tracks the time interval between successive requests for a particular
    # DAG.
    interarrivals = {}

    # Tracks the most recent arrival for each DAG -- used to calculate
    # interarrival times.
    last_arrivals = {}

    # Maintains a list of all other schedulers in the system, so we can
    # propagate metadata to them.
    schedulers = []

    connect_socket = context.socket(zmq.REP)
    connect_socket.bind(sutils.BIND_ADDR_TEMPLATE % (CONNECT_PORT))

    func_create_socket = context.socket(zmq.REP)
    func_create_socket.bind(sutils.BIND_ADDR_TEMPLATE % (FUNC_CREATE_PORT))

    func_call_socket = context.socket(zmq.REP)
    func_call_socket.bind(sutils.BIND_ADDR_TEMPLATE % (FUNC_CALL_PORT))

    dag_create_socket = context.socket(zmq.REP)
    dag_create_socket.bind(sutils.BIND_ADDR_TEMPLATE % (DAG_CREATE_PORT))

    dag_call_socket = context.socket(zmq.REP)
    dag_call_socket.bind(sutils.BIND_ADDR_TEMPLATE % (DAG_CALL_PORT))

    dag_delete_socket = context.socket(zmq.REP)
    dag_delete_socket.bind(sutils.BIND_ADDR_TEMPLATE % (DAG_DELETE_PORT))

    list_socket = context.socket(zmq.REP)
    list_socket.bind(sutils.BIND_ADDR_TEMPLATE % (LIST_PORT))

    exec_status_socket = context.socket(zmq.PULL)
    exec_status_socket.bind(sutils.BIND_ADDR_TEMPLATE % (sutils.STATUS_PORT))

    sched_update_socket = context.socket(zmq.PULL)
    sched_update_socket.bind(sutils.BIND_ADDR_TEMPLATE %
                             (sutils.SCHED_UPDATE_PORT))

    pin_accept_socket = context.socket(zmq.PULL)
    pin_accept_socket.setsockopt(zmq.RCVTIMEO, 500)
    pin_accept_socket.bind(sutils.BIND_ADDR_TEMPLATE %
                           (sutils.PIN_ACCEPT_PORT))

    requestor_cache = SocketCache(context, zmq.REQ)
    pusher_cache = SocketCache(context, zmq.PUSH)

    poller = zmq.Poller()
    poller.register(connect_socket, zmq.POLLIN)
    poller.register(func_create_socket, zmq.POLLIN)
    poller.register(func_call_socket, zmq.POLLIN)
    poller.register(dag_create_socket, zmq.POLLIN)
    poller.register(dag_call_socket, zmq.POLLIN)
    poller.register(dag_delete_socket, zmq.POLLIN)
    poller.register(list_socket, zmq.POLLIN)
    poller.register(exec_status_socket, zmq.POLLIN)
    poller.register(sched_update_socket, zmq.POLLIN)

    # Start the policy engine.
    policy = DefaultCloudburstSchedulerPolicy(pin_accept_socket,
                                              pusher_cache,
                                              kvs,
                                              ip,
                                              local=local)
    policy.update()

    start = time.time()

    while True:
        socks = dict(poller.poll(timeout=1000))

        if connect_socket in socks and socks[connect_socket] == zmq.POLLIN:
            msg = connect_socket.recv_string()
            connect_socket.send_string(route_addr)

        if (func_create_socket in socks
                and socks[func_create_socket] == zmq.POLLIN):
            create_function(func_create_socket, kvs)

        if func_call_socket in socks and socks[func_call_socket] == zmq.POLLIN:
            call_function(func_call_socket, pusher_cache, policy)

        if (dag_create_socket in socks
                and socks[dag_create_socket] == zmq.POLLIN):
            create_dag(dag_create_socket, pusher_cache, kvs, dags, policy,
                       call_frequency)

        if dag_call_socket in socks and socks[dag_call_socket] == zmq.POLLIN:
            call = DagCall()
            call.ParseFromString(dag_call_socket.recv())

            name = call.name

            t = time.time()
            if name in last_arrivals:
                if name not in interarrivals:
                    interarrivals[name] = []

                interarrivals[name].append(t - last_arrivals[name])

            last_arrivals[name] = t

            if name not in dags:
                resp = GenericResponse()
                resp.success = False
                resp.error = NO_SUCH_DAG

                dag_call_socket.send(resp.SerializeToString())
                continue

            dag = dags[name]
            for fname in dag[0].functions:
                call_frequency[fname.name] += 1

            response = call_dag(call, pusher_cache, dags, policy)
            dag_call_socket.send(response.SerializeToString())

        if (dag_delete_socket in socks
                and socks[dag_delete_socket] == zmq.POLLIN):
            delete_dag(dag_delete_socket, dags, policy, call_frequency)

        if list_socket in socks and socks[list_socket] == zmq.POLLIN:
            msg = list_socket.recv_string()
            prefix = msg if msg else ''

            resp = StringSet()
            resp.keys.extend(sched_utils.get_func_list(kvs, prefix))

            list_socket.send(resp.SerializeToString())

        if exec_status_socket in socks and socks[exec_status_socket] == \
                zmq.POLLIN:
            status = ThreadStatus()
            status.ParseFromString(exec_status_socket.recv())

            policy.process_status(status)

        if sched_update_socket in socks and socks[sched_update_socket] == \
                zmq.POLLIN:
            status = SchedulerStatus()
            status.ParseFromString(sched_update_socket.recv())

            # Retrieve any DAGs that some other scheduler knows about that we
            # do not yet know about.
            for dname in status.dags:
                if dname not in dags:
                    payload = kvs.get(dname)
                    while None in payload:
                        payload = kvs.get(dname)

                    dag = Dag()
                    dag.ParseFromString(payload[dname].reveal())
                    dags[dag.name] = (dag, sched_utils.find_dag_source(dag))

                    for fname in dag.functions:
                        if fname not in call_frequency:
                            call_frequency[fname] = 0

            policy.update_function_locations(status.function_locations)

        end = time.time()

        if end - start > METADATA_THRESHOLD:
            # Update the scheduler policy-related metadata.
            policy.update()

            # If the management IP is None, that means we arre running in
            # local mode, so there is no need to deal with caches and other
            # schedulers.
            if mgmt_ip:
                schedulers = sched_utils.get_ip_set(
                    sched_utils.get_scheduler_list_address(mgmt_ip),
                    requestor_cache, False)

        if end - start > REPORT_THRESHOLD:
            num_unique_executors = policy.get_unique_executors()
            key = scheduler_id + ':' + str(time.time())
            data = {'key': key, 'count': num_unique_executors}

            status = SchedulerStatus()
            for name in dags.keys():
                status.dags.append(name)

            for fname in policy.function_locations:
                for loc in policy.function_locations[fname]:
                    floc = status.function_locations.add()
                    floc.name = fname
                    floc.ip = loc[0]
                    floc.tid = loc[1]

            msg = status.SerializeToString()

            for sched_ip in schedulers:
                if sched_ip != ip:
                    sckt = pusher_cache.get(
                        sched_utils.get_scheduler_update_address(sched_ip))
                    sckt.send(msg)

            stats = ExecutorStatistics()
            for fname in call_frequency:
                fstats = stats.functions.add()
                fstats.name = fname
                fstats.call_count = call_frequency[fname]
                logging.info('Reporting %d calls for function %s.' %
                             (call_frequency[fname], fname))

                call_frequency[fname] = 0

            for dname in interarrivals:
                dstats = stats.dags.add()
                dstats.name = dname
                dstats.call_count = len(interarrivals[dname]) + 1
                dstats.interarrival.extend(interarrivals[dname])

                interarrivals[dname].clear()

            # We only attempt to send the statistics if we are running in
            # cluster mode. If we are running in local mode, we write them to
            # the local log file.
            if mgmt_ip:
                sckt = pusher_cache.get(
                    sutils.get_statistics_report_address(mgmt_ip))
                sckt.send(stats.SerializeToString())

            start = time.time()
Esempio n. 9
0
    def test_create_gpu_dag(self):
        # Create a simple two-function DAG and add it to the inbound socket.
        dag_name = 'dag'
        fn = 'fn'

        dag = create_linear_dag([None], [fn], self.kvs_client, dag_name)
        dag.functions[0].gpu = True
        self.socket.inbox.append(dag.SerializeToString())

        dags = {}
        call_frequency = {}

        address_set = {(self.ip, 1)}
        self.policy.unpinned_gpu_executors.update(address_set)

        self.pin_socket.inbox.append(sutils.ok_resp)

        create_dag(self.socket, self.pusher_cache, self.kvs_client, dags,
                   self.policy, call_frequency)

        # Test that the correct metadata was created.
        self.assertTrue(dag_name in dags)
        created, dag_source = dags[dag_name]
        self.assertEqual(created, dag)
        self.assertEqual(len(dag_source), 1)
        self.assertEqual(list(dag_source)[0], fn)
        self.assertTrue(fn in call_frequency)
        self.assertEqual(call_frequency[fn], 0)

        # Test that the DAG is stored in the KVS correctly.
        result = self.kvs_client.get(dag_name)[dag_name]
        created = Dag()
        created.ParseFromString(result.reveal())
        self.assertEqual(created, dag)

        # Test that the correct response was returned to the user.
        self.assertTrue(len(self.socket.outbox), 1)
        response = GenericResponse()
        response.ParseFromString(self.socket.outbox.pop())
        self.assertTrue(response.success)

        # Test that the correct pin messages were sent.
        self.assertEqual(len(self.pusher_cache.socket.outbox), 1)
        messages = self.pusher_cache.socket.outbox
        function_set = {fn}
        for message in messages:
            pin_msg = PinFunction()
            pin_msg.ParseFromString(message)
            self.assertEqual(pin_msg.response_address, self.ip)
            self.assertTrue(pin_msg.name in function_set)
            function_set.discard(pin_msg.name)

        self.assertEqual(len(function_set), 0)

        for address in address_set:
            self.assertTrue(
                get_pin_address(*address) in self.pusher_cache.addresses)

        # Test that the policy engine has the correct metadata stored.
        self.assertEqual(len(self.policy.unpinned_cpu_executors), 0)
        self.assertEqual(len(self.policy.pending_dags), 0)
        self.assertTrue(fn in self.policy.function_locations)

        self.assertEqual(len(self.policy.function_locations[fn]), 1)
Esempio n. 10
0
def scheduler(ip, mgmt_ip, route_addr, policy_type):

    # If the management IP is not set, we are running in local mode.
    local = (mgmt_ip is None)
    kvs = AnnaTcpClient(route_addr, ip, local=local)

    scheduler_id = str(uuid.uuid4())

    context = zmq.Context(1)
    context.set(zmq.MAX_SOCKETS, 10000)

    # A mapping from a DAG's name to its protobuf representation.
    dags = {}

    # Tracks how often a request for each function is received.
    call_frequency = {}

    # Tracks the time interval between successive requests for a particular
    # DAG.
    interarrivals = {}

    # Tracks the most recent arrival for each DAG -- used to calculate
    # interarrival times.
    last_arrivals = {}

    # Maintains a list of all other schedulers in the system, so we can
    # propagate metadata to them.
    schedulers = set()

    connect_socket = context.socket(zmq.REP)
    connect_socket.bind(sutils.BIND_ADDR_TEMPLATE % (CONNECT_PORT))

    func_create_socket = context.socket(zmq.REP)
    func_create_socket.bind(sutils.BIND_ADDR_TEMPLATE % (FUNC_CREATE_PORT))

    func_call_socket = context.socket(zmq.REP)
    func_call_socket.bind(sutils.BIND_ADDR_TEMPLATE % (FUNC_CALL_PORT))

    # This is for handle the invocation from queue
    # Mainly for storage event
    func_call_queue_socket = context.socket(zmq.PULL)
    func_call_queue_socket.bind(sutils.BIND_ADDR_TEMPLATE %
                                (FUNC_CALL_QUEUE_PORT))

    dag_create_socket = context.socket(zmq.REP)
    dag_create_socket.bind(sutils.BIND_ADDR_TEMPLATE % (DAG_CREATE_PORT))

    dag_call_socket = context.socket(zmq.REP)
    dag_call_socket.bind(sutils.BIND_ADDR_TEMPLATE % (DAG_CALL_PORT))

    dag_delete_socket = context.socket(zmq.REP)
    dag_delete_socket.bind(sutils.BIND_ADDR_TEMPLATE % (DAG_DELETE_PORT))

    list_socket = context.socket(zmq.REP)
    list_socket.bind(sutils.BIND_ADDR_TEMPLATE % (LIST_PORT))

    exec_status_socket = context.socket(zmq.PULL)
    exec_status_socket.bind(sutils.BIND_ADDR_TEMPLATE % (sutils.STATUS_PORT))

    sched_update_socket = context.socket(zmq.PULL)
    sched_update_socket.bind(sutils.BIND_ADDR_TEMPLATE %
                             (sutils.SCHED_UPDATE_PORT))

    pin_accept_socket = context.socket(zmq.PULL)
    pin_accept_socket.setsockopt(zmq.RCVTIMEO, 10000)  # 10 seconds.
    pin_accept_socket.bind(sutils.BIND_ADDR_TEMPLATE %
                           (sutils.PIN_ACCEPT_PORT))

    continuation_socket = context.socket(zmq.PULL)
    continuation_socket.bind(sutils.BIND_ADDR_TEMPLATE %
                             (sutils.CONTINUATION_PORT))

    if not local:
        management_request_socket = context.socket(zmq.REQ)
        management_request_socket.setsockopt(zmq.RCVTIMEO, 500)
        # By setting this flag, zmq matches replies with requests.
        management_request_socket.setsockopt(zmq.REQ_CORRELATE, 1)
        # Relax strict alternation between request and reply.
        # For detailed explanation, see here: http://api.zeromq.org/4-1:zmq-setsockopt
        management_request_socket.setsockopt(zmq.REQ_RELAXED, 1)
        management_request_socket.connect(
            sched_utils.get_scheduler_list_address(mgmt_ip))

    pusher_cache = SocketCache(context, zmq.PUSH)

    poller = zmq.Poller()
    poller.register(connect_socket, zmq.POLLIN)
    poller.register(func_create_socket, zmq.POLLIN)
    poller.register(func_call_socket, zmq.POLLIN)
    poller.register(func_call_queue_socket, zmq.POLLIN)
    poller.register(dag_create_socket, zmq.POLLIN)
    poller.register(dag_call_socket, zmq.POLLIN)
    poller.register(dag_delete_socket, zmq.POLLIN)
    poller.register(list_socket, zmq.POLLIN)
    poller.register(exec_status_socket, zmq.POLLIN)
    poller.register(sched_update_socket, zmq.POLLIN)
    poller.register(continuation_socket, zmq.POLLIN)

    # Start the policy engine.
    policy = DefaultCloudburstSchedulerPolicy(pin_accept_socket,
                                              pusher_cache,
                                              kvs,
                                              ip,
                                              policy_type,
                                              local=local)
    policy.update()

    start = time.time()

    while True:
        socks = dict(poller.poll(timeout=1000))

        if connect_socket in socks and socks[connect_socket] == zmq.POLLIN:
            msg = connect_socket.recv_string()
            connect_socket.send_string(route_addr)

        if (func_create_socket in socks
                and socks[func_create_socket] == zmq.POLLIN):
            create_function(func_create_socket, kvs)

        if func_call_socket in socks and socks[func_call_socket] == zmq.POLLIN:
            call_function(func_call_socket, pusher_cache, policy)

        if func_call_queue_socket in socks and socks[
                func_call_queue_socket] == zmq.POLLIN:
            call_function_from_queue(func_call_queue_socket, pusher_cache,
                                     policy)

        if (dag_create_socket in socks
                and socks[dag_create_socket] == zmq.POLLIN):
            create_dag(dag_create_socket, pusher_cache, kvs, dags, policy,
                       call_frequency)

        if dag_call_socket in socks and socks[dag_call_socket] == zmq.POLLIN:
            start_t = int(time.time() * 1000000)
            call = DagCall()
            call.ParseFromString(dag_call_socket.recv())

            name = call.name

            t = time.time()
            if name in last_arrivals:
                if name not in interarrivals:
                    interarrivals[name] = []

                interarrivals[name].append(t - last_arrivals[name])

            last_arrivals[name] = t

            if name not in dags:
                resp = GenericResponse()
                resp.success = False
                resp.error = NO_SUCH_DAG

                dag_call_socket.send(resp.SerializeToString())
                continue

            dag = dags[name]
            for fname in dag[0].functions:
                call_frequency[fname.name] += 1

            response = call_dag(call, pusher_cache, dags, policy)
            sched_t = int(time.time() * 1000000)
            logging.info(
                f'App function {name} recv: {start_t}, scheduled: {sched_t}')
            dag_call_socket.send(response.SerializeToString())

        if (dag_delete_socket in socks
                and socks[dag_delete_socket] == zmq.POLLIN):
            delete_dag(dag_delete_socket, dags, policy, call_frequency)

        if list_socket in socks and socks[list_socket] == zmq.POLLIN:
            msg = list_socket.recv_string()
            prefix = msg if msg else ''

            resp = StringSet()
            resp.keys.extend(sched_utils.get_func_list(kvs, prefix))

            list_socket.send(resp.SerializeToString())

        if exec_status_socket in socks and socks[exec_status_socket] == \
                zmq.POLLIN:
            status = ThreadStatus()
            status.ParseFromString(exec_status_socket.recv())

            policy.process_status(status)

        if sched_update_socket in socks and socks[sched_update_socket] == \
                zmq.POLLIN:
            status = SchedulerStatus()
            status.ParseFromString(sched_update_socket.recv())

            # Retrieve any DAGs that some other scheduler knows about that we
            # do not yet know about.
            for dname in status.dags:
                if dname not in dags:
                    payload = kvs.get(dname)
                    while None in payload:
                        payload = kvs.get(dname)

                    dag = Dag()
                    dag.ParseFromString(payload[dname].reveal())
                    dags[dag.name] = (dag, sched_utils.find_dag_source(dag))

                    for fname in dag.functions:
                        if fname.name not in call_frequency:
                            call_frequency[fname.name] = 0

            policy.update_function_locations(status.function_locations)

        if continuation_socket in socks and socks[continuation_socket] == \
                zmq.POLLIN:
            start_t = int(time.time() * 1000000)

            continuation = Continuation()
            continuation.ParseFromString(continuation_socket.recv())

            call = continuation.call
            call.name = continuation.name

            result = Value()
            result.ParseFromString(continuation.result)

            dag, sources = dags[call.name]
            for source in sources:
                call.function_args[source].values.extend([result])

            call_dag(call, pusher_cache, dags, policy, continuation.id)
            sched_t = int(time.time() * 1000000)
            print(
                f'App function {call.name} recv: {start_t}, scheduled: {sched_t}'
            )
            for fname in dag.functions:
                call_frequency[fname.name] += 1

        end = time.time()

        if end - start > METADATA_THRESHOLD:
            # Update the scheduler policy-related metadata.
            policy.update()

            # If the management IP is None, that means we arre running in
            # local mode, so there is no need to deal with caches and other
            # schedulers.
            if not local:
                latest_schedulers = sched_utils.get_ip_set(
                    management_request_socket, False)
                if latest_schedulers:
                    schedulers = latest_schedulers

        if end - start > REPORT_THRESHOLD:
            status = SchedulerStatus()
            for name in dags.keys():
                status.dags.append(name)

            for fname in policy.function_locations:
                for loc in policy.function_locations[fname]:
                    floc = status.function_locations.add()
                    floc.name = fname
                    floc.ip = loc[0]
                    floc.tid = loc[1]

            msg = status.SerializeToString()

            for sched_ip in schedulers:
                if sched_ip != ip:
                    sckt = pusher_cache.get(
                        sched_utils.get_scheduler_update_address(sched_ip))
                    sckt.send(msg)

            stats = ExecutorStatistics()
            for fname in call_frequency:
                fstats = stats.functions.add()
                fstats.name = fname
                fstats.call_count = call_frequency[fname]
                logging.debug('Reporting %d calls for function %s.' %
                              (call_frequency[fname], fname))

                call_frequency[fname] = 0

            for dname in interarrivals:
                dstats = stats.dags.add()
                dstats.name = dname
                dstats.call_count = len(interarrivals[dname]) + 1
                dstats.interarrival.extend(interarrivals[dname])

                interarrivals[dname].clear()

            # We only attempt to send the statistics if we are running in
            # cluster mode. If we are running in local mode, we write them to
            # the local log file.
            if mgmt_ip:
                sckt = pusher_cache.get(
                    sutils.get_statistics_report_address(mgmt_ip))
                sckt.send(stats.SerializeToString())

            start = time.time()