예제 #1
0
def emulate(args):
    '''
    Starts the Sender/Receiver process threads, sleeps for
    args.RUNTIME, and terminates both threads. Returns a tuple of
    lists: (s_log, r_log), where s_log is sender's log and r_log is
    receiver's log.
    '''
    args.SENDER_TIMEOUT = float(args.SENDER_TIMEOUT)
    args.RUNTIME = float(args.RUNTIME)
    
    assert args.SENDER_TIMEOUT > 0
    assert args.RUNTIME > 0
    
    s = Sender(args.SENDER_TIMEOUT)
    r = Receiver()
    s.set_remote_endpoint(r)
    r.set_remote_endpoint(s)

    r.daemon = True
    s.daemon = True

    # Start the sender process.
    s.start()
    r.start()

    try:
        time.sleep(args.RUNTIME)
    except KeyboardInterrupt:
        print "Interrupted, terminating."

    # We have to be careful with terminating the two threads, as they
    # can only exit in specific states, and we can cause a deadlock.
    # First, we terminate the sender, and wait for it to finish. Once
    # this happens, we know that the receiver is in an ok terminal
    # state, so we terminate it right after.
    s.terminate()
    s.join()

    r.terminate()
    r.join()

    #s.log.append((([0,0], "S-TERM")))
    #r.log.append((([0,0], "R-TERM")))

    # At this point, the sender is not generating any more
    # messages. But, we might have some oustanding messages in
    # receiver's queue. So, process these, if any:
    
    while not r.rx_queue.empty():
        # Receive msg and generate any outstanding acks.
        r.transition()
    
    r.transition()
    r.transition()

    # Consume any outstanding acks on the sender's side.
    s.consume_acks()

    return (s.log, r.log)
예제 #2
0
def emulate(args):
    '''
    Starts the Sender/Receiver process threads, sleeps for
    args.RUNTIME, and terminates both threads. Returns a tuple of
    lists: (s_log, r_log), where s_log is sender's log and r_log is
    receiver's log.
    '''
    args.SENDER_TIMEOUT = float(args.SENDER_TIMEOUT)
    args.RUNTIME = float(args.RUNTIME)

    assert args.SENDER_TIMEOUT > 0
    assert args.RUNTIME > 0

    s = Sender(args.SENDER_TIMEOUT)
    r = Receiver()
    s.set_remote_endpoint(r)
    r.set_remote_endpoint(s)

    r.daemon = True
    s.daemon = True

    # Start the sender process.
    s.start()
    r.start()

    try:
        time.sleep(args.RUNTIME)
    except KeyboardInterrupt:
        print "Interrupted, terminating."

    # We have to be careful with terminating the two threads, as they
    # can only exit in specific states, and we can cause a deadlock.
    # First, we terminate the sender, and wait for it to finish. Once
    # this happens, we know that the receiver is in an ok terminal
    # state, so we terminate it right after.
    s.terminate()
    s.join()

    r.terminate()
    r.join()

    #s.log.append((([0,0], "S-TERM")))
    #r.log.append((([0,0], "R-TERM")))

    # At this point, the sender is not generating any more
    # messages. But, we might have some oustanding messages in
    # receiver's queue. So, process these, if any:

    while not r.rx_queue.empty():
        # Receive msg and generate any outstanding acks.
        r.transition()

    r.transition()
    r.transition()

    # Consume any outstanding acks on the sender's side.
    s.consume_acks()

    return (s.log, r.log)
예제 #3
0
    def run(self):
        self.done = Event()
        while not self.done.isSet():
            print('Waiting for clients')
            client_sock, client_info = self.server_sock.accept()
            r = Receiver(self, client_sock, client_info)
            r.daemon = True
            r.start()

        self.server_sock.close()
        print('The server socket has been closed')
예제 #4
0
    def run(self):
        self.law = APIWrapper()

        try:
            so = Service.objects.get(name='replication')
            config_d = json.loads(so.config)
            self.listener_port = int(config_d['listener_port'])
            nco = NetworkConnection.objects.get(
                name=config_d['network_interface'])
            self.listener_interface = nco.ipaddr
        except NetworkConnection.DoesNotExist:
            self.listener_interface = '0.0.0.0'
        except Exception as e:
            msg = ('Failed to fetch network interface for Listner/Broker. '
                   'Exception: %s' % e.__str__())
            return logger.error(msg)

        try:
            self.uuid = Appliance.objects.get(current_appliance=True).uuid
        except Exception as e:
            msg = ('Failed to get uuid of current appliance. Aborting. '
                   'Exception: %s' % e.__str__())
            return logger.error(msg)

        ctx = zmq.Context()
        frontend = ctx.socket(zmq.ROUTER)
        frontend.set_hwm(10)
        frontend.bind('tcp://%s:%d'
                      % (self.listener_interface, self.listener_port))

        backend = ctx.socket(zmq.ROUTER)
        backend.bind('ipc://%s' % settings.REPLICATION.get('ipc_socket'))

        poller = zmq.Poller()
        poller.register(frontend, zmq.POLLIN)
        poller.register(backend, zmq.POLLIN)
        self.local_receivers = {}

        iterations = 10
        poll_interval = 6000  # 6 seconds
        msg_count = 0
        while True:
            # This loop may still continue even if replication service
            # is terminated, as long as data is coming in.
            socks = dict(poller.poll(timeout=poll_interval))
            if (frontend in socks and socks[frontend] == zmq.POLLIN):
                address, command, msg = frontend.recv_multipart()
                if (address not in self.remote_senders):
                    self.remote_senders[address] = 1
                else:
                    self.remote_senders[address] += 1
                msg_count += 1
                if (msg_count == 1000):
                    msg_count = 0
                    for rs, count in self.remote_senders.items():
                        logger.debug('Active Receiver: %s. Messages processed:'
                                     '%d' % (rs, count))
                if (command == 'sender-ready'):
                    logger.debug('initial greeting from %s' % address)
                    # Start a new receiver and send the appropriate response
                    try:
                        start_nr = True
                        if (address in self.local_receivers):
                            start_nr = False
                            ecode = self.local_receivers[address].exitcode
                            if (ecode is not None):
                                del self.local_receivers[address]
                                logger.debug('Receiver(%s) exited. exitcode: '
                                             '%s. Forcing removal from broker '
                                             'list.' % (address, ecode))
                                start_nr = True
                            else:
                                msg = ('Receiver(%s) already exists. '
                                       'Will not start a new one.' %
                                       address)
                                logger.error(msg)
                                # @todo: There may be a different way to handle
                                # this. For example, we can pass the message to
                                # the active receiver and factor into it's
                                # retry/robust logic. But that is for later.
                                frontend.send_multipart(
                                    [address, 'receiver-init-error', msg])
                        if (start_nr):
                            nr = Receiver(address, msg)
                            nr.daemon = True
                            nr.start()
                            logger.debug('New Receiver(%s) started.' % address)
                            self.local_receivers[address] = nr
                        continue
                    except Exception as e:
                        msg = ('Exception while starting the '
                               'new receiver for %s: %s'
                               % (address, e.__str__()))
                        logger.error(msg)
                        frontend.send_multipart(
                            [address, 'receiver-init-error', msg])
                else:
                    # do we hit hwm? is the dealer still connected?
                    backend.send_multipart([address, command, msg])

            elif (backend in socks and socks[backend] == zmq.POLLIN):
                address, command, msg = backend.recv_multipart()
                if (command == 'new-send'):
                    rid = int(msg)
                    logger.debug('new-send request received for %d' % rid)
                    rcommand = 'ERROR'
                    try:
                        replica = Replica.objects.get(id=rid)
                        if (replica.enabled):
                            self._process_send(replica)
                            msg = ('A new Sender started successfully for '
                                   'Replication Task(%d).' % rid)
                            rcommand = 'SUCCESS'
                        else:
                            msg = ('Failed to start a new Sender for '
                                   'Replication '
                                   'Task(%d) because it is disabled.' % rid)
                    except Exception as e:
                        msg = ('Failed to start a new Sender for Replication '
                               'Task(%d). Exception: %s' % (rid, e.__str__()))
                        logger.error(msg)
                    finally:
                        backend.send_multipart([address, rcommand, str(msg)])
                elif (address in self.remote_senders):
                    if (command in ('receiver-ready', 'receiver-error', 'btrfs-recv-finished')):  # noqa E501
                        logger.debug('Identitiy: %s command: %s'
                                     % (address, command))
                        backend.send_multipart([address, b'ACK', ''])
                        # a new receiver has started. reply to the sender that
                        # must be waiting
                    frontend.send_multipart([address, command, msg])

            else:
                iterations -= 1
                if (iterations == 0):
                    iterations = 10
                    self._prune_senders()
                    self._delete_receivers()
                    cur_time = time.time()
                    if (self.trail_prune_time is None or
                            (cur_time - self.trail_prune_time) > 3600):
                        # prune send/receive trails every hour or so.
                        self.trail_prune_time = cur_time
                        map(self.prune_replica_trail, Replica.objects.filter())
                        map(self.prune_receive_trail,
                            ReplicaShare.objects.filter())
                        logger.debug('Replica trails are truncated '
                                     'successfully.')

                    if (os.getppid() != self.ppid):
                        logger.error('Parent exited. Aborting.')
                        ctx.destroy()
                        # do some cleanup of senders before quitting?
                        break
예제 #5
0
    def run(self):
        self.law = APIWrapper()

        try:
            so = Service.objects.get(name="replication")
            config_d = json.loads(so.config)
            self.listener_port = int(config_d["listener_port"])
            nco = NetworkConnection.objects.get(name=config_d["network_interface"])
            self.listener_interface = nco.ipaddr
        except NetworkConnection.DoesNotExist:
            self.listener_interface = "0.0.0.0"
        except Exception as e:
            msg = (
                "Failed to fetch network interface for Listner/Broker. "
                "Exception: %s" % e.__str__()
            )
            return logger.error(msg)

        try:
            self.uuid = Appliance.objects.get(current_appliance=True).uuid
        except Exception as e:
            msg = (
                "Failed to get uuid of current appliance. Aborting. "
                "Exception: %s" % e.__str__()
            )
            return logger.error(msg)

        ctx = zmq.Context()
        frontend = ctx.socket(zmq.ROUTER)
        frontend.set_hwm(10)
        frontend.bind("tcp://%s:%d" % (self.listener_interface, self.listener_port))

        backend = ctx.socket(zmq.ROUTER)
        backend.bind("ipc://%s" % settings.REPLICATION.get("ipc_socket"))

        poller = zmq.Poller()
        poller.register(frontend, zmq.POLLIN)
        poller.register(backend, zmq.POLLIN)
        self.local_receivers = {}

        iterations = 10
        poll_interval = 6000  # 6 seconds
        msg_count = 0
        while True:
            # This loop may still continue even if replication service
            # is terminated, as long as data is coming in.
            socks = dict(poller.poll(timeout=poll_interval))
            if frontend in socks and socks[frontend] == zmq.POLLIN:
                address, command, msg = frontend.recv_multipart()
                if address not in self.remote_senders:
                    self.remote_senders[address] = 1
                else:
                    self.remote_senders[address] += 1
                msg_count += 1
                if msg_count == 1000:
                    msg_count = 0
                    for rs, count in self.remote_senders.items():
                        logger.debug(
                            "Active Receiver: %s. Messages processed:"
                            "%d" % (rs, count)
                        )
                if command == "sender-ready":
                    logger.debug("initial greeting from %s" % address)
                    # Start a new receiver and send the appropriate response
                    try:
                        start_nr = True
                        if address in self.local_receivers:
                            start_nr = False
                            ecode = self.local_receivers[address].exitcode
                            if ecode is not None:
                                del self.local_receivers[address]
                                logger.debug(
                                    "Receiver(%s) exited. exitcode: "
                                    "%s. Forcing removal from broker "
                                    "list." % (address, ecode)
                                )
                                start_nr = True
                            else:
                                msg = (
                                    "Receiver(%s) already exists. "
                                    "Will not start a new one." % address
                                )
                                logger.error(msg)
                                # @todo: There may be a different way to handle
                                # this. For example, we can pass the message to
                                # the active receiver and factor into it's
                                # retry/robust logic. But that is for later.
                                frontend.send_multipart(
                                    [address, "receiver-init-error", msg]
                                )
                        if start_nr:
                            nr = Receiver(address, msg)
                            nr.daemon = True
                            nr.start()
                            logger.debug("New Receiver(%s) started." % address)
                            self.local_receivers[address] = nr
                        continue
                    except Exception as e:
                        msg = (
                            "Exception while starting the "
                            "new receiver for %s: %s" % (address, e.__str__())
                        )
                        logger.error(msg)
                        frontend.send_multipart([address, "receiver-init-error", msg])
                else:
                    # do we hit hwm? is the dealer still connected?
                    backend.send_multipart([address, command, msg])

            elif backend in socks and socks[backend] == zmq.POLLIN:
                address, command, msg = backend.recv_multipart()
                if command == "new-send":
                    rid = int(msg)
                    logger.debug("new-send request received for %d" % rid)
                    rcommand = "ERROR"
                    try:
                        replica = Replica.objects.get(id=rid)
                        if replica.enabled:
                            self._process_send(replica)
                            msg = (
                                "A new Sender started successfully for "
                                "Replication Task(%d)." % rid
                            )
                            rcommand = "SUCCESS"
                        else:
                            msg = (
                                "Failed to start a new Sender for "
                                "Replication "
                                "Task(%d) because it is disabled." % rid
                            )
                    except Exception as e:
                        msg = (
                            "Failed to start a new Sender for Replication "
                            "Task(%d). Exception: %s" % (rid, e.__str__())
                        )
                        logger.error(msg)
                    finally:
                        backend.send_multipart([address, rcommand, str(msg)])
                elif address in self.remote_senders:
                    if command in (
                        "receiver-ready",
                        "receiver-error",
                        "btrfs-recv-finished",
                    ):  # noqa E501
                        logger.debug("Identitiy: %s command: %s" % (address, command))
                        backend.send_multipart([address, b"ACK", ""])
                        # a new receiver has started. reply to the sender that
                        # must be waiting
                    frontend.send_multipart([address, command, msg])

            else:
                iterations -= 1
                if iterations == 0:
                    iterations = 10
                    self._prune_senders()
                    self._delete_receivers()
                    cur_time = time.time()
                    if (
                        self.trail_prune_time is None
                        or (cur_time - self.trail_prune_time) > 3600
                    ):
                        # prune send/receive trails every hour or so.
                        self.trail_prune_time = cur_time
                        map(self.prune_replica_trail, Replica.objects.filter())
                        map(self.prune_receive_trail, ReplicaShare.objects.filter())
                        logger.debug("Replica trails are truncated successfully.")

                    if os.getppid() != self.ppid:
                        logger.error("Parent exited. Aborting.")
                        ctx.destroy()
                        # do some cleanup of senders before quitting?
                        break