Beispiel #1
0
class Server:

    TIME_HEARTBEAT = 3

    '''
    @id: [0 .. num_nodes-1].
    @is_leader  is set to `True` when creating Server0, otherwise, to `False`.
    @is_replica is set to `True` if node_id is less than f+1
    @num_nodes  is used to identify the broadcast range.
    @uid        is used for printing the messages.
    @current_leader is updated when receiving heartbeat from the leader.
    Every server is a replica.
    '''
    def __init__(self, node_id, is_leader, num_nodes, num_clients, is_recover):
        self.node_id = node_id
        self.uid = "Server#" +  str(node_id)
        self.num_nodes = num_nodes
        self.num_clients = num_clients

        # network controller
        self.nt = Network(self.uid, num_nodes, num_clients)
        try:
            self.t_recv = Thread(target=self.receive)
            self.t_recv.start()
        except:
            print(self.uid, "error: unable to start new thread")

        # acceptor
        # acceptor must be started first
        self.acceptor = Acceptor(self.node_id, self.nt)

        # Leaders
        self.count_heartbeat = 4 # greater than 3
        self.is_leader       = is_leader
        self.leader_dead     = False
        if is_leader:
            time.sleep(2) # wait for other servers to start
            self.leader = Leader(node_id, self.num_nodes, self.nt)
            self.leader.init_scout()
            if TERM_LOG:
                print(self.uid, "is leader")
        self.current_leader = -1 # updated when receiving leader's heartbeat
                                 # remember to update replica.leader_id and leader

        # Replicas
        max_faulty = (num_nodes - 1) / 2 # f in the paper
        # if (node_id <= max_faulty):
        if node_id < num_nodes: # set all servers as replicas here
            # f+1  servers are replicas
            # 2f+1 (all)  servers are acceptors
            self.is_replica = False
            if is_recover:
                self.waitfor_replica = True
                self.replica_all_down = False
                self.broadcast_to_server(
                    str(("requestReplicaInfo", self.node_id)))
                threading.Timer(self.TIME_HEARTBEAT*2,
                                self.set_replica_all_down).start()
                while self.waitfor_replica and not self.replica_all_down:
                    pass
                if not self.replica_all_down:
                    self.replica = Replica(node_id, self.nt, self.recv_slot_num,
                                       self.recv_decisions)
                else:
                    self.replica = Replica(node_id, self.nt)
            else:
                self.replica = Replica(node_id, self.nt)
            self.is_replica = True
        else:
            self.is_replica = False

        # leader broadcasts heartbeat
        self.rev_heartbeat = True # whether receive heartbeat in current period
        if is_leader:
            self.broadcast_heartbeat()
        self.check_heartbeat()

        # notice Master that it starts
        self.nt.send_to_master(str(("serverStarted", self.node_id)))

    def set_replica_all_down(self):
        self.replica_all_down = True

    def broadcast_to_server(self, message):
        self.nt.broadcast_to_server(message)

    def broadcast_to_client(self, message):
        self.nt.broadcast_to_client(message)

    def send_to_server(self, dest_id, message):
        self.nt.send_to_server(dest_id, message)

    def send_to_client(self, dest_id, message):
        self.nt.send_to_client(dest_id, message)

    def receive(self):
        while 1:
            buf = self.nt.receive()
            if len(buf) > 0:
                # TODO: handle the received value
                message = list(literal_eval(buf))

                # to replica
                if (message[0] in ['request', 'decision',
                                   'requestReplicaInfo']):
                    self.replica_operation(message)
                # to leader
                if (message[0] in ['propose', 'adopted', 'preempted',
                                   'leaderAlive', 'initLeader']):
                    self.leader_operation(message)
                # to scout
                if (message[0] == "p1b" and self.is_leader):
                    self.leader.scout_operation(message)
                # to commander
                if (message[0] == "p2b" and self.is_leader):
                    self.leader.commander_operation(message)
                # to acceptor
                if (message[0] in ['p1a', 'p2a']):
                    self.acceptor_operation(message)
                # to server
                if (message[0] == "heartbeat"):
                    self.receive_heartbeat(message)
                if (message[0] == "election"):
                    self.rev_heartbeat = True
                    self.broadcast_to_server("'heartbeat', "+str(self.node_id))
                if (message[0] == "timeBombLeader"):
                    if (self.is_leader):
                        self.nt.set_remain_message(int(message[1]))
                if (message[0] == "replicaInfo" and self.waitfor_replica):
                    self.recv_slot_num = message[1]
                    self.recv_decisions = set(message[2])
                    self.waitfor_replica = False

    def replica_operation(self, message):
        if not self.is_replica:
            return
        # request from client:  ['request', (k, cid, message)]
        if (message[0] == "request"):
            self.replica.propose(message[1])
        # decision from leader: ['decision', (slot_num, proposal)]
        elif (message[0] == "decision"):
            self.replica.decide(message[1])
        # state request from server __init__: ['requestReplicaInfo', sender_id]
        elif (message[0] == "requestReplicaInfo"):
            self.send_to_server(message[1], str(("replicaInfo",
                self.replica.slot_num, list(self.replica.decisions))))

    def leader_operation(self, message):
        if not self.is_leader:
            return
        # proposal from replica: ['propose', (slot_num, proposal)]
        if message[0] == 'propose':
            self.leader.process_propose(message)
        # adoption from scout: ['adopted', ballot_num, pvalue]
        elif message[0] == 'adopted':
            self.leader.process_adopted(message)
        # preemption from commander: ['preempted', ballot]
        elif message[0] == 'preempted':
            self.leader.process_preempted(message)
        elif message[0] == 'leaderAlive':
            self.nt.send_to_master(str(("leaderAlive", self.node_id)))
        elif message[0] == 'initLeader':
            # Leaders
            self.count_heartbeat = 4 # greater than 3
            self.is_leader = False
            self.leader_dead = False
            self.leader = Leader(self.node_id, self.num_nodes, self.nt)
            self.is_leader = True
            self.leader.init_scout()
            if TERM_LOG:
                print(self.uid, "resets leader info")
            self.current_leader = -1

    def acceptor_operation(self, message):
        # request from scout: ['p1a', (sender_id, scout_id), ballot_num]
        if message[0] == 'p1a':
            self.acceptor.process_p1a(message)
        # request from commander:
        # ['p2a', (sender_id, commander_id), (ballot_num, slot_num, proposal)]
        elif message[0] == 'p2a':
            self.acceptor.process_p2a(message)

    def broadcast_heartbeat(self):
        if self.is_leader or \
           self.current_leader == self.node_id: # others may be elected
            self.broadcast_to_server(str(("heartbeat", self.node_id)))
        threading.Timer(self.TIME_HEARTBEAT,
                        self.broadcast_heartbeat).start()

    def receive_heartbeat(self, message):
        # heartbeat from leader: ['heartbeat', leader_id]
        candidate = int(message[1])
        if candidate == self.current_leader:
            # DB: print(self.uid, "set rev_heartbeat", "true", time.strftime("%M:%S", time.gmtime()))
            self.rev_heartbeat = True
            self.leader_dead = False
        else:
            if self.current_leader >= 0:
                if candidate < self.current_leader:
                    self.rev_heartbeat = True
                    self.current_leader = candidate
                    if candidate == self.node_id:
                        self.count_heartbeat = 0
                    else:
                        self.is_leader = False
                    if self.is_replica:
                        self.replica.set_leader(candidate)
                    if TERM_LOG:
                        print(self.uid, " updates Server#", candidate,
                              " as leader candidate", sep="")
            else:
                self.rev_heartbeat = True
                #self.leader_dead = True
                self.current_leader = candidate
                if candidate == self.node_id:
                    self.count_heartbeat = 0
                else:
                    self.is_leader = False
                if self.is_replica:
                    self.replica.set_leader(candidate)
                if TERM_LOG:
                    print(self.uid, " updates Server#", candidate,
                          " as leader candidate", sep="")

        if self.current_leader == self.node_id:
            self.count_heartbeat = self.count_heartbeat + 1
            if (self.count_heartbeat == 3):
                self.nt.set_remain_message()
                self.is_leader = True
                self.leader = Leader(self.node_id, self.num_nodes, self.nt)
                self.leader.init_scout()
                self.broadcast_to_client(str(("leaderElected", self.node_id)))
                self.nt.send_to_master(str(("leaderElected", self.node_id)))


    '''
    Starts leader election whenever the leader's heartbeat
    timeouts.
    '''
    def check_heartbeat(self):
        if (not self.is_leader) and (not self.rev_heartbeat):
            # TODO: leader election
            if TERM_LOG:
                print(self.uid, " starts election Server#",
                      self.node_id, sep="")
            self.current_leader = self.node_id
            self.count_heartbeat = 0
            if self.is_replica:
                self.replica.set_leader(self.node_id)
            self.broadcast_heartbeat()
            #self.broadcast_to_server(str(("heartbeat", self.node_id)))
        self.rev_heartbeat = False
        threading.Timer(self.TIME_HEARTBEAT+1, self.check_heartbeat).start()
Beispiel #2
0
class Client:

    REQUEST_TIME = 3
    TIME_ALLCLEAR = 2

    """
    @uid is used for printing the messages.
    """

    def __init__(self, client_id, num_nodes):
        self.client_id = client_id
        self.command_id = 0
        self.num_nodes = num_nodes
        self.uid = "Client#" + str(client_id)
        self.chatlog = []
        self.queue = []  # all waiting messages
        self.history = set()  # all received messages
        self.leader_id = 0  # default leader is server#0
        self.counter = 0  # periodically resend messages

        # network controller
        self.nt = Network(self.uid, self.num_nodes)
        try:
            self.t_recv = Thread(target=self.receive)
            self.t_recv.start()
        except:
            print(self.uid, "error: unable to start new thread")

        # send requests periodically
        # self.period_request()

    def send(self, message):
        self.nt.broadcast_to_server(message)

    def send_request(self, triple):
        encode = str(("request", triple))
        # broadcast to all replicas or servers
        self.send(encode)

    def period_request(self):
        for tp in self.queue:
            self.send_request(tp)
        # threading.Timer(self.REQUEST_TIME, self.period_request).start()

    def monitor_queue(self):
        while self.queue:
            self.counter = self.counter + 1
            if self.counter == 5:
                if TERM_LOG:
                    print(self.uid, "sends initLeader")
                self.nt.broadcast_to_server(str(("initLeader", self.client_id)))  # reset current leader
            if self.counter >= 15:
                if TERM_LOG:
                    print(self.uid, "allClear timeout")
                self.counter = 0
                break
            time.sleep(self.TIME_ALLCLEAR)
        self.nt.send_to_master(str(("allCleared", self.client_id)))

    def receive(self):
        while 1:
            buf = self.nt.receive()
            if len(buf) > 0:
                # handle the received value
                if TERM_LOG:
                    print(self.uid, "handles", buf)
                buf = literal_eval(buf)
                # buf = buf.split()

                # send request to server (leader)
                # should we send requests to all replicas?
                if buf[0] == "sendMessage":
                    self.command_id = self.command_id + 1
                    triple = (self.client_id, self.command_id, buf[1])
                    self.queue.append(triple)
                    self.send_request(triple)

                # clear the message queue
                if buf[0] == "allClear":
                    self.counter = 0
                    threading.Timer(0, self.monitor_queue).start()

                # print chat log
                if buf[0] == "printChatLog":
                    log = ""
                    if TERM_LOG:
                        print(self.uid, "prints chat log")
                    for l in self.chatlog:
                        # TODO: may need index_number instead of slot_num
                        if TERM_LOG:
                            print(self.uid, ">>", l[1] - 1, l[0], l[2])
                        # else:
                        # print(l[1]-1, " ", l[0], ": ", l[2], sep='')
                        log = log + "{0} {1}: {2}\n".format(l[1] - 1, l[0], l[2])
                    self.nt.send_to_master(str(("chatLog", log.strip())))

                # receive response from leader, send ask to master
                # format: (response, client_id, cid, (index, chat))
                if buf[0] == "response":
                    if buf[1] == self.client_id:
                        try:
                            # remove the message from self.queue
                            triple = next(x for x in self.queue if x[1] == buf[2])
                            self.queue.remove(triple)
                        except StopIteration:
                            pass
                    if not buf in self.history:
                        # add decision into self.chatlog
                        # log format: (client_id, slot_num, result)
                        self.chatlog.append((buf[1], buf[3][0], buf[3][1]))
                        self.history.add(buf)
                        if TERM_LOG:
                            print(self.uid, " logs <", buf[3], ">", sep="")

                if buf[0] == "leaderElected":
                    self.period_request()
Beispiel #3
0
            """ Print out the client specified by client_index's chat history
                in the format described on the handout """
            waitfor_chatlog = True
            nt.send_to_client(client_index, str(("printChatLog", 0)))
            # ensure the log has been printed
            while waitfor_chatlog:
                pass

        if line[0] == 'allClear':
            """ Ensure that this blocks until all messages that are going to
                come to consensus in PAXOS do, and that all clients have heard
                of them """
            waitfor_clear = set(range(num_clients))
            waitfor_leader_resp = True
            nt.broadcast_to_client(str(("allClear", 0)))
            nt.broadcast_to_server(str(("leaderAlive", 0)))
            while waitfor_clear or waitfor_leader_resp:
                if TERM_LOG:
                    time.sleep(CLEAR_TIME)
                    print(uid, "waits for allClear")
                pass

        if line[0] == 'crashServer':
            node_index = int(line[1])
            """ Immediately crash the server specified by node_index """
            if node_index in range(num_nodes):
                if nodes[node_index] != None:
                    os.kill(nodes[node_index], signal.SIGKILL)
                    nodes[node_index] = None
                else:
                    if TERM_LOG: