def emulate(args): ''' Starts the Sender/Receiver process threads, sleeps for args.RUNTIME, and terminates both threads. Returns a tuple of lists: (s_log, r_log), where s_log is sender's log and r_log is receiver's log. ''' args.SENDER_TIMEOUT = float(args.SENDER_TIMEOUT) args.RUNTIME = float(args.RUNTIME) assert args.SENDER_TIMEOUT > 0 assert args.RUNTIME > 0 s = Sender(args.SENDER_TIMEOUT) r = Receiver() s.set_remote_endpoint(r) r.set_remote_endpoint(s) r.daemon = True s.daemon = True # Start the sender process. s.start() r.start() try: time.sleep(args.RUNTIME) except KeyboardInterrupt: print "Interrupted, terminating." # We have to be careful with terminating the two threads, as they # can only exit in specific states, and we can cause a deadlock. # First, we terminate the sender, and wait for it to finish. Once # this happens, we know that the receiver is in an ok terminal # state, so we terminate it right after. s.terminate() s.join() r.terminate() r.join() #s.log.append((([0,0], "S-TERM"))) #r.log.append((([0,0], "R-TERM"))) # At this point, the sender is not generating any more # messages. But, we might have some oustanding messages in # receiver's queue. So, process these, if any: while not r.rx_queue.empty(): # Receive msg and generate any outstanding acks. r.transition() r.transition() r.transition() # Consume any outstanding acks on the sender's side. s.consume_acks() return (s.log, r.log)
def emulate(args): ''' Starts the Sender/Receiver process threads, sleeps for args.RUNTIME, and terminates both threads. Returns a tuple of lists: (s_log, r_log), where s_log is sender's log and r_log is receiver's log. ''' args.SENDER_TIMEOUT = float(args.SENDER_TIMEOUT) args.RUNTIME = float(args.RUNTIME) assert args.SENDER_TIMEOUT > 0 assert args.RUNTIME > 0 s = Sender(args.SENDER_TIMEOUT) r = Receiver() s.set_remote_endpoint(r) r.set_remote_endpoint(s) r.daemon = True s.daemon = True # Start the sender process. s.start() r.start() try: time.sleep(args.RUNTIME) except KeyboardInterrupt: print "Interrupted, terminating." # We have to be careful with terminating the two threads, as they # can only exit in specific states, and we can cause a deadlock. # First, we terminate the sender, and wait for it to finish. Once # this happens, we know that the receiver is in an ok terminal # state, so we terminate it right after. s.terminate() s.join() r.terminate() r.join() #s.log.append((([0,0], "S-TERM"))) #r.log.append((([0,0], "R-TERM"))) # At this point, the sender is not generating any more # messages. But, we might have some oustanding messages in # receiver's queue. So, process these, if any: while not r.rx_queue.empty(): # Receive msg and generate any outstanding acks. r.transition() r.transition() r.transition() # Consume any outstanding acks on the sender's side. s.consume_acks() return (s.log, r.log)
def run(self): self.done = Event() while not self.done.isSet(): print('Waiting for clients') client_sock, client_info = self.server_sock.accept() r = Receiver(self, client_sock, client_info) r.daemon = True r.start() self.server_sock.close() print('The server socket has been closed')
def run(self): self.law = APIWrapper() try: so = Service.objects.get(name='replication') config_d = json.loads(so.config) self.listener_port = int(config_d['listener_port']) nco = NetworkConnection.objects.get( name=config_d['network_interface']) self.listener_interface = nco.ipaddr except NetworkConnection.DoesNotExist: self.listener_interface = '0.0.0.0' except Exception as e: msg = ('Failed to fetch network interface for Listner/Broker. ' 'Exception: %s' % e.__str__()) return logger.error(msg) try: self.uuid = Appliance.objects.get(current_appliance=True).uuid except Exception as e: msg = ('Failed to get uuid of current appliance. Aborting. ' 'Exception: %s' % e.__str__()) return logger.error(msg) ctx = zmq.Context() frontend = ctx.socket(zmq.ROUTER) frontend.set_hwm(10) frontend.bind('tcp://%s:%d' % (self.listener_interface, self.listener_port)) backend = ctx.socket(zmq.ROUTER) backend.bind('ipc://%s' % settings.REPLICATION.get('ipc_socket')) poller = zmq.Poller() poller.register(frontend, zmq.POLLIN) poller.register(backend, zmq.POLLIN) self.local_receivers = {} iterations = 10 poll_interval = 6000 # 6 seconds msg_count = 0 while True: # This loop may still continue even if replication service # is terminated, as long as data is coming in. socks = dict(poller.poll(timeout=poll_interval)) if (frontend in socks and socks[frontend] == zmq.POLLIN): address, command, msg = frontend.recv_multipart() if (address not in self.remote_senders): self.remote_senders[address] = 1 else: self.remote_senders[address] += 1 msg_count += 1 if (msg_count == 1000): msg_count = 0 for rs, count in self.remote_senders.items(): logger.debug('Active Receiver: %s. Messages processed:' '%d' % (rs, count)) if (command == 'sender-ready'): logger.debug('initial greeting from %s' % address) # Start a new receiver and send the appropriate response try: start_nr = True if (address in self.local_receivers): start_nr = False ecode = self.local_receivers[address].exitcode if (ecode is not None): del self.local_receivers[address] logger.debug('Receiver(%s) exited. exitcode: ' '%s. Forcing removal from broker ' 'list.' % (address, ecode)) start_nr = True else: msg = ('Receiver(%s) already exists. ' 'Will not start a new one.' % address) logger.error(msg) # @todo: There may be a different way to handle # this. For example, we can pass the message to # the active receiver and factor into it's # retry/robust logic. But that is for later. frontend.send_multipart( [address, 'receiver-init-error', msg]) if (start_nr): nr = Receiver(address, msg) nr.daemon = True nr.start() logger.debug('New Receiver(%s) started.' % address) self.local_receivers[address] = nr continue except Exception as e: msg = ('Exception while starting the ' 'new receiver for %s: %s' % (address, e.__str__())) logger.error(msg) frontend.send_multipart( [address, 'receiver-init-error', msg]) else: # do we hit hwm? is the dealer still connected? backend.send_multipart([address, command, msg]) elif (backend in socks and socks[backend] == zmq.POLLIN): address, command, msg = backend.recv_multipart() if (command == 'new-send'): rid = int(msg) logger.debug('new-send request received for %d' % rid) rcommand = 'ERROR' try: replica = Replica.objects.get(id=rid) if (replica.enabled): self._process_send(replica) msg = ('A new Sender started successfully for ' 'Replication Task(%d).' % rid) rcommand = 'SUCCESS' else: msg = ('Failed to start a new Sender for ' 'Replication ' 'Task(%d) because it is disabled.' % rid) except Exception as e: msg = ('Failed to start a new Sender for Replication ' 'Task(%d). Exception: %s' % (rid, e.__str__())) logger.error(msg) finally: backend.send_multipart([address, rcommand, str(msg)]) elif (address in self.remote_senders): if (command in ('receiver-ready', 'receiver-error', 'btrfs-recv-finished')): # noqa E501 logger.debug('Identitiy: %s command: %s' % (address, command)) backend.send_multipart([address, b'ACK', '']) # a new receiver has started. reply to the sender that # must be waiting frontend.send_multipart([address, command, msg]) else: iterations -= 1 if (iterations == 0): iterations = 10 self._prune_senders() self._delete_receivers() cur_time = time.time() if (self.trail_prune_time is None or (cur_time - self.trail_prune_time) > 3600): # prune send/receive trails every hour or so. self.trail_prune_time = cur_time map(self.prune_replica_trail, Replica.objects.filter()) map(self.prune_receive_trail, ReplicaShare.objects.filter()) logger.debug('Replica trails are truncated ' 'successfully.') if (os.getppid() != self.ppid): logger.error('Parent exited. Aborting.') ctx.destroy() # do some cleanup of senders before quitting? break
def run(self): self.law = APIWrapper() try: so = Service.objects.get(name="replication") config_d = json.loads(so.config) self.listener_port = int(config_d["listener_port"]) nco = NetworkConnection.objects.get(name=config_d["network_interface"]) self.listener_interface = nco.ipaddr except NetworkConnection.DoesNotExist: self.listener_interface = "0.0.0.0" except Exception as e: msg = ( "Failed to fetch network interface for Listner/Broker. " "Exception: %s" % e.__str__() ) return logger.error(msg) try: self.uuid = Appliance.objects.get(current_appliance=True).uuid except Exception as e: msg = ( "Failed to get uuid of current appliance. Aborting. " "Exception: %s" % e.__str__() ) return logger.error(msg) ctx = zmq.Context() frontend = ctx.socket(zmq.ROUTER) frontend.set_hwm(10) frontend.bind("tcp://%s:%d" % (self.listener_interface, self.listener_port)) backend = ctx.socket(zmq.ROUTER) backend.bind("ipc://%s" % settings.REPLICATION.get("ipc_socket")) poller = zmq.Poller() poller.register(frontend, zmq.POLLIN) poller.register(backend, zmq.POLLIN) self.local_receivers = {} iterations = 10 poll_interval = 6000 # 6 seconds msg_count = 0 while True: # This loop may still continue even if replication service # is terminated, as long as data is coming in. socks = dict(poller.poll(timeout=poll_interval)) if frontend in socks and socks[frontend] == zmq.POLLIN: address, command, msg = frontend.recv_multipart() if address not in self.remote_senders: self.remote_senders[address] = 1 else: self.remote_senders[address] += 1 msg_count += 1 if msg_count == 1000: msg_count = 0 for rs, count in self.remote_senders.items(): logger.debug( "Active Receiver: %s. Messages processed:" "%d" % (rs, count) ) if command == "sender-ready": logger.debug("initial greeting from %s" % address) # Start a new receiver and send the appropriate response try: start_nr = True if address in self.local_receivers: start_nr = False ecode = self.local_receivers[address].exitcode if ecode is not None: del self.local_receivers[address] logger.debug( "Receiver(%s) exited. exitcode: " "%s. Forcing removal from broker " "list." % (address, ecode) ) start_nr = True else: msg = ( "Receiver(%s) already exists. " "Will not start a new one." % address ) logger.error(msg) # @todo: There may be a different way to handle # this. For example, we can pass the message to # the active receiver and factor into it's # retry/robust logic. But that is for later. frontend.send_multipart( [address, "receiver-init-error", msg] ) if start_nr: nr = Receiver(address, msg) nr.daemon = True nr.start() logger.debug("New Receiver(%s) started." % address) self.local_receivers[address] = nr continue except Exception as e: msg = ( "Exception while starting the " "new receiver for %s: %s" % (address, e.__str__()) ) logger.error(msg) frontend.send_multipart([address, "receiver-init-error", msg]) else: # do we hit hwm? is the dealer still connected? backend.send_multipart([address, command, msg]) elif backend in socks and socks[backend] == zmq.POLLIN: address, command, msg = backend.recv_multipart() if command == "new-send": rid = int(msg) logger.debug("new-send request received for %d" % rid) rcommand = "ERROR" try: replica = Replica.objects.get(id=rid) if replica.enabled: self._process_send(replica) msg = ( "A new Sender started successfully for " "Replication Task(%d)." % rid ) rcommand = "SUCCESS" else: msg = ( "Failed to start a new Sender for " "Replication " "Task(%d) because it is disabled." % rid ) except Exception as e: msg = ( "Failed to start a new Sender for Replication " "Task(%d). Exception: %s" % (rid, e.__str__()) ) logger.error(msg) finally: backend.send_multipart([address, rcommand, str(msg)]) elif address in self.remote_senders: if command in ( "receiver-ready", "receiver-error", "btrfs-recv-finished", ): # noqa E501 logger.debug("Identitiy: %s command: %s" % (address, command)) backend.send_multipart([address, b"ACK", ""]) # a new receiver has started. reply to the sender that # must be waiting frontend.send_multipart([address, command, msg]) else: iterations -= 1 if iterations == 0: iterations = 10 self._prune_senders() self._delete_receivers() cur_time = time.time() if ( self.trail_prune_time is None or (cur_time - self.trail_prune_time) > 3600 ): # prune send/receive trails every hour or so. self.trail_prune_time = cur_time map(self.prune_replica_trail, Replica.objects.filter()) map(self.prune_receive_trail, ReplicaShare.objects.filter()) logger.debug("Replica trails are truncated successfully.") if os.getppid() != self.ppid: logger.error("Parent exited. Aborting.") ctx.destroy() # do some cleanup of senders before quitting? break