Example #1
0
    def __process_queue(self):
        self.msg_queue_lock.acquire()

        n = len(self.msg_queue)
        
        while n > 0:
            msg = self.msg_queue.pop()
            assert isinstance(msg, CustomMessage)
            
            src_node = self.nodes[msg.source]
            
            if msg.destination not in self.nodes:
                log.warning("Message with unknown destination {}".format(msg.destination))
                dst_node = None
            else:
                dst_node = self.nodes[msg.destination]
                            
            deliver = True
            for p in self.partitions.values():
                if p.are_partitioned(src_node, dst_node):
                    deliver = False
            
            if deliver:
                self.backend.send_message(msg.destination, msg)
            else:
                self.msg_queue.appendleft(msg)

            n -= 1

        self.msg_queue_lock.release()
Example #2
0
    def __process_queue(self):
        self.msg_queue_lock.acquire()

        n = len(self.msg_queue)

        while n > 0:
            msg = self.msg_queue.pop()
            assert isinstance(msg, CustomMessage)

            src_node = self.nodes[msg.source]

            if msg.destination not in self.nodes:
                log.warning("Message with unknown destination {}".format(
                    msg.destination))
                dst_node = None
            else:
                dst_node = self.nodes[msg.destination]

            deliver = True
            for p in self.partitions.values():
                if p.are_partitioned(src_node, dst_node):
                    deliver = False

            if deliver:
                self.backend.send_message(msg.destination, msg)
            else:
                self.msg_queue.appendleft(msg)

            n -= 1

        self.msg_queue_lock.release()
Example #3
0
 def wait_for_state(self, state):
     self.cv.acquire()
     while self.state not in (state, Node.STATE_FAILED, Node.STATE_CRASHED):
         self.cv.wait()
     self.cv.release()
     
     if self.state != state:
         log.warning("Node entered {} state while waiting for state {}".format( Node.state_str[self.state], Node.state_str[state]))
Example #4
0
    def wait_for_state(self, state):
        self.cv.acquire()
        while self.state not in (state, Node.STATE_FAILED, Node.STATE_CRASHED):
            self.cv.wait()
        self.cv.release()

        if self.state != state:
            log.warning(
                "Node entered {} state while waiting for state {}".format(
                    Node.state_str[self.state], Node.state_str[state]))
Example #5
0
 def hello_sender(tries_left):
     if node_id not in self.node_zid:
         self.__send_zmq_message(zmq_msg)
         tries_left -= 1
         if tries_left > 0:
             self.loop.add_timeout(self.loop.time() + 1, hello_sender, tries_left = tries_left)
 
     if tries_left == 0:
         log.warning("Node %s did not respond to hello message" % node_id)
         self.ds.nodes[node_id].set_state(Node.STATE_FAILED)
Example #6
0
    def recover_node(self, node_id, deliver):
        log.warning("recovering {}".format(node_id))
        if not node_id in self.nodes:
            raise ChistributedException("No such node: {}".format(node_id))

        n = self.nodes[node_id]
        
        if n.state != Node.STATE_PARTITIONED:
            raise ChistributedException("Node {} is not in a failed state".format(node_id))
                
        self.remove_partition(self.__failed_node_partition_name(node_id), deliver)
        
        n.set_state(Node.STATE_RUNNING)
Example #7
0
            def hello_sender(tries_left):
                if node_id not in self.node_zid:
                    self.__send_zmq_message(zmq_msg)
                    tries_left -= 1
                    if tries_left > 0:
                        self.loop.add_timeout(self.loop.time() + 1,
                                              hello_sender,
                                              tries_left=tries_left)

                if tries_left == 0:
                    log.warning("Node %s did not respond to hello message" %
                                node_id)
                    self.ds.nodes[node_id].set_state(Node.STATE_FAILED)
Example #8
0
 def remove_partition(self, name, deliver):
     if name not in self.partitions:
         raise ChistributedException("No such partition: %s" % name)
     
     # Acquire lock to prevent messages being added to message queue
     # while we remove the partition
     self.msg_queue_lock.acquire()        
            
     self.__process_partitioned_messages(self.partitions[name], deliver)
     
     del self.partitions[name]        
     
     self.msg_queue_lock.release()
     log.warning("Removing partition {}".format(name))
Example #9
0
    def fail_node(self, node_id):
        log.warning("failing {}".format(node_id))
        if not node_id in self.nodes:
            raise ChistributedException("No such node: {}".format(node_id))
        
        n = self.nodes[node_id]
        
        if n.state == Node.STATE_PARTITIONED:
            raise ChistributedException("Node {} is already in a failed state".format(node_id))
            
        if n.state != Node.STATE_RUNNING:
            raise ChistributedException("Node {} cannot be failed because it is not running".format(node_id))

        self.add_partition(self.__failed_node_partition_name(node_id), [node_id])
        
        n.set_state(Node.STATE_PARTITIONED)
Example #10
0
    def stop_node(self, node_id):
        '''
        Sends SIGTERM to the named node.

        Node implementations should catch it and shutdown because killing procs is
        risky business.
        '''
        log.info("Stopping node " + node_id)
        
        rc = self.node_pids[node_id].poll()
        if rc is not None:
            rc = self.node_pids[node_id].wait()
            log.warning("Node {} had already exited (rc = {})".format(node_id, rc))
        else:
            self.node_pids[node_id].terminate()
            
        del self.node_pids[node_id]        
Example #11
0
 def hello_sender(tries_left):
     if node_id not in self.node_zid:
         self.__send_zmq_message(zmq_msg)
         tries_left -= 1
         if tries_left > 0:
             self.loop.add_timeout(self.loop.time() + 1, hello_sender, tries_left = tries_left)
 
     if tries_left == 0:
         log.warning("Node %s did not respond to hello message" % node_id)
         
         # Check whether the node has died
         rc = self.node_pids[node_id].poll()
         if rc is not None:
             self.ds.nodes[node_id].set_state(Node.STATE_CRASHED)
             log.warning("Node %s has crashed (rc = %i)" % (node_id, rc))
             self.loop.stop()
         else:
             self.ds.nodes[node_id].set_state(Node.STATE_FAILED)
Example #12
0
    def stop_node(self, node_id):
        '''
        Sends SIGTERM to the named node.

        Node implementations should catch it and shutdown because killing procs is
        risky business.
        '''
        log.info("Stopping node " + node_id)

        rc = self.node_pids[node_id].poll()
        if rc is not None:
            rc = self.node_pids[node_id].wait()
            log.warning("Node {} had already exited (rc = {})".format(
                node_id, rc))
        else:
            self.node_pids[node_id].terminate()

        del self.node_pids[node_id]
Example #13
0
    def add_partition(self, name, nodes1, nodes2 = None):
        if name in self.partitions:
            raise ChistributedException("A partition named '%s' already exists" % name)
        
        for n in nodes1:
            if n not in self.nodes:
                raise ChistributedException("No such node: %s" % n)

        if nodes2 is None:
            nodes2 = [n for n in self.nodes if n not in nodes1]
        else:
            for n in nodes2:
                if n not in self.nodes:
                    raise ChistributedException("No such node: %s" % n)
            
        p = Partition(name, [self.nodes[n] for n in nodes1], [self.nodes[n] for n in nodes2])
                
        self.partitions[name] = p

        log.warning("Creating partition {}".format(name))
Example #14
0
    def __process_partitioned_messages(self, p, deliver):
        n = len(self.msg_queue)
        
        while n > 0:
            msg = self.msg_queue.pop()
            assert isinstance(msg, CustomMessage)
            
            src_node = self.nodes[msg.source]
            
            if msg.destination not in self.nodes:
                log.warning("Message with unknown destination {}".format(msg.destination))
                dst_node = None
            else:
                dst_node = self.nodes[msg.destination]
                            
            if p.are_partitioned(src_node, dst_node):
                if deliver:
                    self.backend.send_message(msg.destination, msg)
            else:
                self.msg_queue.appendleft(msg)

            n -= 1
Example #15
0
            def hello_sender(tries_left):
                if node_id not in self.node_zid:
                    self.__send_zmq_message(zmq_msg)
                    tries_left -= 1
                    if tries_left > 0:
                        self.loop.add_timeout(self.loop.time() + 1,
                                              hello_sender,
                                              tries_left=tries_left)

                if tries_left == 0:
                    log.warning("Node %s did not respond to hello message" %
                                node_id)

                    # Check whether the node has died
                    rc = self.node_pids[node_id].poll()
                    if rc is not None:
                        self.ds.nodes[node_id].set_state(Node.STATE_CRASHED)
                        log.warning("Node %s has crashed (rc = %i)" %
                                    (node_id, rc))
                        self.loop.stop()
                    else:
                        self.ds.nodes[node_id].set_state(Node.STATE_FAILED)
Example #16
0
    def __process_partitioned_messages(self, p, deliver):
        n = len(self.msg_queue)

        while n > 0:
            msg = self.msg_queue.pop()
            assert isinstance(msg, CustomMessage)

            src_node = self.nodes[msg.source]

            if msg.destination not in self.nodes:
                log.warning("Message with unknown destination {}".format(
                    msg.destination))
                dst_node = None
            else:
                dst_node = self.nodes[msg.destination]

            if p.are_partitioned(src_node, dst_node):
                if deliver:
                    self.backend.send_message(msg.destination, msg)
            else:
                self.msg_queue.appendleft(msg)

            n -= 1