def __close_down(self, reply_code, reply_text): # Important, so the connection does not get reopened by # the on_connection_closed callback. # This should only be called by one of the finish methods. # Make sure the main thread does not continue blocking # as it believes that we're still looking for pending messages: self.__tell_publisher_to_stop_waiting_for_gentle_finish() # Change the state of the state machine: self.statemachine.set_to_permanently_unavailable() self.statemachine.set_detail_closed_by_publisher() # Close connection try: if self.thread._connection is not None: if self.thread._connection.is_closed or self.thread._connection.is_closing: logdebug(LOGGER, 'Connection is closed or closing.') # If connection is already closed, the on_connection_close is not # called, so the ioloop continues, possibly waiting for reconnect. # So we need to prevent reconnects or other events. As long # as ioloop runs, thread cannot be finished/joined. self.thread.make_permanently_closed_by_user() elif self.thread._connection.is_open: logdebug(LOGGER, 'Connection is open. Closing now. This will trigger the RabbitMQ callbacks.') self.thread._connection.close(reply_code=reply_code, reply_text=reply_text) # "If there are any open channels, it will attempt to close them prior to fully disconnecting." (pika docs) else: logerror(LOGGER, 'Connection was None when trying to close. Synchronization error between threads!') except AttributeError as e: logdebug(LOGGER, 'AttributeError from pika during connection closedown (%s: %s)', e.__class__.__name__, e.message)
def __log_about_double_return(self, frame, body): if not self.__have_warned_about_double_unroutable_already: body_json = json.loads(body) logerror(LOGGER, 'The RabbitMQ node refused a message a second time (with the original routing key "%s" and the emergency routing key "%s"). Dropping the message.', body_json['original_routing_key'], frame.routing_key) self.__have_warned_about_double_unroutable_already = True logdebug(LOGGER, 'This is the second time the message comes back. Dropping it.')
def on_channel_closed(self, channel, reply_code, reply_text): logdebug(LOGGER, 'Channel was closed: %s (code %s)', reply_text, reply_code) # Channel closed because user wants to close: if self.statemachine.is_PERMANENTLY_UNAVAILABLE(): if self.statemachine.get_detail_closed_by_publisher(): logdebug( LOGGER, 'Channel close event due to close command by user. This is expected.' ) # Channel closed because even fallback exchange did not exist: elif reply_code == 404 and "NOT_FOUND - no exchange 'FALLBACK'" in reply_text: logerror( LOGGER, 'Channel closed because FALLBACK exchange does not exist. Need to close connection to trigger all the necessary close down steps.' ) self.thread._connection.close() # This will reconnect! # Channel closed because exchange did not exist: elif reply_code == 404: logdebug( LOGGER, 'Channel closed because the exchange "%s" did not exist.', self.__node_manager.get_exchange_name()) self.__use_different_exchange_and_reopen_channel() # Other unexpected channel close: else: logerror( LOGGER, 'Unexpected channel shutdown. Need to close connection to trigger all the necessary close down steps.' ) self.thread._connection.close() # This will reconnect!
def on_message_not_accepted(self, channel, returned_frame, props, body): # Messages that are returned are confirmed anyways. # If we sent 20 messages that are returned, all 20 are acked, # so we do not need to retrieve them from the unconfirmed # messages after resending. # In the end, we'll have published 40 messages and received 40 acks. # Logging... logtrace( LOGGER, 'Return frame: %s', returned_frame ) # <Basic.Return(['exchange=rabbitsender_integration_tests', 'reply_code=312', 'reply_text=NO_ROUTE', 'routing_key=cmip6.publisher.HASH.cart.datasets'])> logtrace( LOGGER, 'Return props: %s', props ) # <BasicProperties(['content_type=application/json', 'delivery_mode=2'])> logtrace(LOGGER, 'Return body: %s', body) # Was it the first or second time it comes back? if returned_frame.reply_text == 'NO_ROUTE': loginfo( LOGGER, 'The message was returned because it could not be assigned to any queue. No binding for routing key "%s".', returned_frame.routing_key) if returned_frame.routing_key.startswith( esgfpid.utils.RABBIT_EMERGENCY_ROUTING_KEY): self.__log_about_double_return(returned_frame, body) else: self.__resend_message(returned_frame, props, body) else: logerror( LOGGER, 'The message was returned. Routing key: %s. Unknown reason: %s', returned_frame.routing_key, returned_frame.reply_text) self.__resend_message(returned_frame, props, body)
def __wait_some_more_and_redecide(self, iteration): wait_seconds = defaults.RABBIT_ASYN_FINISH_WAIT_SECONDS logdebug( LOGGER, 'Gentle finish (iteration %i): Waiting some more for pending messages...', self.__close_decision_iterations) # Instead of time.sleep(), add an event to the thread's ioloop self.__close_decision_iterations += 1 if self.thread._connection is not None: self.thread._connection.add_timeout( wait_seconds, self.recursive_decision_about_closing) self.__is_in_process_of_gently_closing = True # Problem: If a reconnect occurs after this, this event will be lost. # I cannot retrieve if from the ioloop and pass it to the new one. # So, during a reconnection, we check if gently-finish was running, # and add a new timeout to the new ioloop, using # "continue_gently_closing_if_applicable()". # This may mess up the amount of time the gently-finish takes, though. # TODO Maybe one day it is possible to transfer events from one ioloop to # another? else: logerror( LOGGER, 'Connection was None when trying to wait for pending messages. Synchronization error between threads!' )
def __add_emergency_routing_key(self, body_json): emergency_routing_key = esgfpid.utils.RABBIT_EMERGENCY_ROUTING_KEY key_for_routing_key = esgfpid.assistant.messages.JSON_KEY_ROUTING_KEY # If there was no routing key, set the original one to 'None' if key_for_routing_key not in body_json: logerror( LOGGER, 'Very unexpected: RabbitMQ returned a message that had no routing key: %s', body_json) body_json[key_for_routing_key] = 'None' # If it already HAS the emergency routing key, do not adapt the routing key # (This means the message already came back a second time...) if body_json[key_for_routing_key] == emergency_routing_key: pass # Otherwise, store the original one in another field... # and overwrite it by the emergency routing key: else: body_json['original_routing_key'] = body_json[key_for_routing_key] logdebug(LOGGER, 'Adding emergency routing key %s', emergency_routing_key) body_json[key_for_routing_key] = emergency_routing_key return body_json
def on_connection_error(self, connection, msg): oldhost = self.__get_whole_host_name() time_passed = datetime.datetime.now() - self.__start_connect_time time_passed_seconds = time_passed.total_seconds() logerror( LOGGER, 'Could not connect to %s: "%s" (connection failure after %s seconds)', oldhost, msg, time_passed_seconds) self.__store_connection_error_info(msg, oldhost) # If there was a force-finish, we do not reconnect. if self.statemachine.is_FORCE_FINISHED(): errormsg = 'Permanently failed to connect to RabbitMQ.' if self.statemachine.detail_asked_to_gently_close_by_publisher: errormsg += ' Tried all hosts until was force-closed by user.' elif self.statemachine.detail_asked_to_force_close_by_publisher: errormsg += ' Tried all hosts until a user close-down forced us to give up (e.g. the maximum waiting time was reached).' errormsg += ' Giving up. No PID requests will be sent.' self.__give_up_reconnecting_and_raise_exception(errormsg) # If there is alternative URLs, try one of them: if self.__node_manager.has_more_urls(): logdebug(LOGGER, 'Connection failure: %s fallback URLs left to try.', self.__node_manager.get_num_left_urls()) self.__node_manager.set_next_host() newhost = self.__get_whole_host_name() loginfo(LOGGER, 'Connection failure: Trying to connect (now) to %s.', newhost) reopen_seconds = 0 self.__wait_and_trigger_reconnection(connection, reopen_seconds) # If there is no URLs, reset the node manager to # start at the first nodes again... else: self.__reconnect_counter += 1 if self.__reconnect_counter <= self.__max_reconnection_tries: reopen_seconds = self.__wait_seconds_before_reconnect logdebug( LOGGER, 'Connection failure: Failed connecting to all hosts. Waiting %s seconds and starting over.', reopen_seconds) self.__node_manager.reset_nodes() newhost = self.__node_manager.get_connection_parameters().host loginfo( LOGGER, 'Connection failure: Trying to connect (in %s seconds) to %s.', reopen_seconds, newhost) self.__wait_and_trigger_reconnection(connection, reopen_seconds) # Give up after so many tries... else: errormsg = ( 'Permanently failed to connect to RabbitMQ. Tried all hosts %s times. Giving up. No PID requests will be sent.' % (self.__max_reconnection_tries + 1)) self.__give_up_reconnecting_and_raise_exception(errormsg)
def continue_gently_closing_if_applicable(self): if self.__is_in_process_of_gently_closing: logdebug(LOGGER, 'Continue gentle shutdown even after reconnect (iteration %i)...', self.__close_decision_iterations) if self.thread._connection is not None: wait_seconds = defaults.RABBIT_ASYN_FINISH_WAIT_SECONDS self.thread._connection.add_timeout(wait_seconds, self.recursive_decision_about_closing) else: logerror(LOGGER, 'Connection was None when trying to wait for pending messages (after reconnect). Synchronization error between threads!')
def __resend_message(self, returned_frame, props, body): try: body_json = json.loads(body) body_json = self.__add_emergency_routing_key(body_json) self.__resend_an_unroutable_message(json.dumps(body_json)) except pika.exceptions.ChannelClosed as e: logdebug(LOGGER, 'Error during "on_message_not_accepted": %s: %s', e.__class__.__name__, e.message) logerror(LOGGER, 'Could not resend message: %s: %s', e.__class__.__name__, e.message)
def __get_json_from_response(self, response): try: response_json = json.loads(response.content) logdebug(LOGGER, 'Solr response ok, returned JSON content.') return response_json except (ValueError, TypeError) as e: msg = 'Error while parsing Solr response. It seems to be no valid JSON. Message: ' + e.message logerror(LOGGER, msg) raise esgfpid.exceptions.SolrError(msg)
def on_channel_closed(self, connection, exception): # From the docs: The exception will either be an instance of # exceptions.ConnectionClosed if a fully-open connection was closed # by user or broker or exception of another type (...) if isinstance(exception, pika.exceptions.ChannelClosed): reply_code = exception.reply_code reply_text = exception.reply_text else: # TODO Not sure when this might happen, could not reproduce. reply_code = -1 reply_text = str(exception) logdebug(LOGGER, 'Channel was closed: %s (code %s)', reply_text, reply_code) # Channel closed because user wants to close: if self.statemachine.is_PERMANENTLY_UNAVAILABLE( ) or self.statemachine.is_FORCE_FINISHED(): if self.statemachine.get_detail_closed_by_publisher(): logdebug( LOGGER, 'Channel close event due to close command by user. This is expected.' ) # Channel closed because even fallback exchange did not exist: elif reply_code == 404 and "NOT_FOUND - no exchange 'FALLBACK'" in reply_text: logerror( LOGGER, 'Channel closed because FALLBACK exchange does not exist. Need to close connection to trigger all the necessary close down steps.' ) self.__undo_resetting_reconnect_counter() self.thread.reset_exchange_name( ) # So next host is tried with normal exchange self.thread._connection.close() # This will reconnect! # TODO: Put a different reply_code and text, so we won't treat this as a Normal Shutdown! # Channel closed because exchange did not exist: elif reply_code == 404: logdebug( LOGGER, 'Channel closed because the exchange "%s" did not exist.', self.__node_manager.get_exchange_name()) self.__use_different_exchange_and_reopen_channel() # Other unexpected channel close: else: logerror( LOGGER, 'Unexpected channel shutdown. Need to close connection to trigger all the necessary close down steps.' ) self.__undo_resetting_reconnect_counter() self.thread._connection.close() # This will reconnect!
def __join_and_rescue(self): success = self.__join() if success: self.__rescue_leftovers() else: for i in xrange(10): time.sleep(1) # blocking loginfo(LOGGER, 'Joining the thread failed once... Retrying.') self.__thread.add_event_force_finish() success = self.__join() if success: self.__rescue_leftovers() else: logerror(LOGGER, 'Joining failed again. No idea why.')
def __send_message_to_queue_once(self, routing_key, messagebody): delivered = False try: delivered = self.__do_send_message(routing_key, messagebody, self.__props) self.__avoid_connection_shutdown() except pika.exceptions.UnroutableError: logerror( LOGGER, 'Message could not be routed to any queue, maybe none was declared yet.' ) raise return delivered
def get_open_word_for_routing_key(self): # Message is published via an open node: if self.__current_node['is_open'] == True: if self.__has_trusted: return 'untrusted-fallback' else: return 'untrusted-only' # Message is published via a trusted node: elif self.__current_node['is_open'] == False: return 'trusted' else: logerror(LOGGER, 'Problem: Unsure whether the current node is open or not!') return 'untrusted-unsure'
def __give_up_reconnecting_and_raise_exception(self, error_message): self.statemachine.set_to_permanently_unavailable() self.statemachine.detail_could_not_connect = True problem_message = self.__connection_errors_to_string() logerror(LOGGER, error_message) logdebug(LOGGER, problem_message) self.__make_permanently_closed_by_error( None, self.thread.ERROR_TEXT_CONNECTION_PERMANENT_ERROR ) # Stops ioloop, so thread may stop! if not (hasattr(defaults, 'IS_TEST_RUN') and defaults.IS_TEST_RUN == True): raise PIDServerException(error_message + '\nProblems:\n' + problem_message) else: msg = 'PIDServerException would have been raised in real life.' logerror(LOGGER, msg)
def __make_ready_for_publishing(self): logdebug( LOGGER, '(Re)connection established, making ready for publication...') # Check for unexpected errors: if self.thread._channel is None: logerror( LOGGER, 'Channel is None after connecting to server. This should not happen.' ) self.statemachine.set_to_permanently_unavailable() if self.thread._connection is None: logerror( LOGGER, 'Connection is None after connecting to server. This should not happen.' ) self.statemachine.set_to_permanently_unavailable() # Normally, it should already be waiting to be available: if self.statemachine.is_WAITING_TO_BE_AVAILABLE(): logdebug(LOGGER, 'Setup is finished. Publishing may start.') logtrace(LOGGER, 'Publishing will use channel no. %s!', self.thread._channel.channel_number) self.statemachine.set_to_available() self.__check_for_already_arrived_messages_and_publish_them() # It was asked to close in the meantime (but might be able to publish the last messages): elif self.statemachine.is_AVAILABLE_BUT_WANTS_TO_STOP(): logdebug( LOGGER, 'Setup is finished, but the module was already asked to be closed in the meantime.' ) self.__check_for_already_arrived_messages_and_publish_them() # It was force-closed in the meantime: elif self.statemachine.is_PERMANENTLY_UNAVAILABLE( ): # state was set in shutter module's __close_down() if self.statemachine.get_detail_closed_by_publisher(): logdebug( LOGGER, 'Setup is finished now, but the module was already force-closed in the meantime.' ) self.shutter.safety_finish( 'closed before connection was ready. reclosing.') elif self.statemachine.detail_could_not_connect: logerror( LOGGER, 'This is not supposed to happen. If the connection failed, this part of the code should not be reached.' ) else: logerror( LOGGER, 'This is not supposed to happen. An unknown event set this module to be unavailable. When was this set to unavailable?' ) else: logdebug(LOGGER, 'Unexpected state.')
def __add_event(self, event): if self._connection is not None: self._connection.add_timeout(self.__PUBLISH_INTERVAL_SECONDS, event) else: # If the main thread wants to add an event so quickly after starting the # thread that not even the connection object is listening for events yet, # we need to force it to wait. # Event listening is the first thing that happens when a thread is started, # but e.g. for shopping carts, the main thread just sends one message and # then wants to close again. # In that case, the thread cannot even receive the close event, as it is # not started yet. logdebug(LOGGER, 'Main thread wants to add event to thread that is not ready to receive events yet. Blocking and waiting.') self.__wait_for_thread_to_accept_events() logdebug(LOGGER, 'Thread declared itself ready to receive events.') self._connection.add_timeout(self.__PUBLISH_INTERVAL_SECONDS, event) logerror(LOGGER, 'Added event after having waited for thread to open.')
def __log_why_cannot_feed_the_rabbit_now(self): log_every_x_times( LOGGER, self.__logcounter_trigger, self.__LOGFREQUENCY, 'Cannot publish message to RabbitMQ (trigger no. %i).', self.__logcounter_trigger) if self.statemachine.is_WAITING_TO_BE_AVAILABLE(): logdebug( LOGGER, 'Cannot publish message to RabbitMQ yet, as the connection is not ready.' ) elif self.statemachine.is_NOT_STARTED_YET(): logerror( LOGGER, 'Cannot publish message to RabbitMQ, as the thread is not running yet.' ) elif self.statemachine.is_PERMANENTLY_UNAVAILABLE( ) or self.statemachine.is_FORCE_FINISHED(): if self.statemachine.detail_could_not_connect: logtrace( LOGGER, 'Could not publish message to RabbitMQ, as the connection failed.' ) if self.__have_not_warned_about_connection_fail_yet: logwarn( LOGGER, 'Could not publish message(s) to RabbitMQ. The connection failed definitively.' ) self.__have_not_warned_about_connection_fail_yet = False elif self.statemachine.get_detail_closed_by_publisher(): logtrace( LOGGER, 'Cannot publish message to RabbitMQ, as the connection was closed by the user.' ) if self.__have_not_warned_about_force_close_yet: logwarn( LOGGER, 'Could not publish message(s) to RabbitMQ. The sender was closed by the user.' ) self.__have_not_warned_about_force_close_yet = False else: if self.thread._channel is None: logerror( LOGGER, 'Very unexpected. Could not publish message(s) to RabbitMQ. There is no channel.' )
def __wait_and_trigger_reconnection(self, connection, wait_seconds): if self.statemachine.is_FORCE_FINISHED(): # TODO This is the same code as above. Make a give_up function from it? #self.statemachine.set_to_permanently_unavailable() #self.statemachine.detail_could_not_connect = True #max_tries = defaults.RABBIT_RECONNECTION_MAX_TRIES errormsg = ( 'Permanently failed to connect to RabbitMQ. Tried all hosts %s until received a force-finish. Giving up. No PID requests will be sent.' % list(self.__all_hosts_that_were_tried)) logerror(LOGGER, errormsg) raise PIDServerException(errormsg) else: self.statemachine.set_to_waiting_to_be_available() loginfo(LOGGER, 'Trying to reconnect to RabbitMQ in %s seconds.', wait_seconds) connection.add_timeout(wait_seconds, self.reconnect) logtrace(LOGGER, 'Reconnect event added to connection %s (not to %s)', connection, self.thread._connection)
def adapt_routing_key_for_untrusted(self, routing_key): # Message is published via an open node: if self.__current_node['is_open'] == True: if self.__has_trusted: return esgfpid.utils.adapt_routing_key_for_untrusted_fallback( routing_key) else: return esgfpid.utils.adapt_routing_key_for_untrusted( routing_key) # Message is published via a trusted node: elif self.__current_node['is_open'] == False: return routing_key else: logerror( LOGGER, 'Problem: Unsure whether the current node is open or not!') return esgfpid.utils.adapt_routing_key_for_untrusted_fallback( routing_key)
def send_message_to_queue(self, message): self.__open_connection_if_not_open() routing_key, msg_string = rabbitutils.get_routing_key_and_string_message_from_message_if_possible( message) success = False error_msg = None try: success = self.__try_sending_message_several_times( routing_key, msg_string) except pika.exceptions.UnroutableError: logerror(LOGGER, 'Refused message with routing key "%s".' % routing_key) body_json = json.loads(msg_string) body_json, new_routing_key = rabbitutils.add_emergency_routing_key( body_json) logerror( LOGGER, 'Refused message with routing key "%s". Resending with "%s".' % (routing_key, new_routing_key)) routing_key = new_routing_key try: success = self.__try_sending_message_several_times( routing_key, msg_string) except pika.exceptions.UnroutableError: error_msg = 'The RabbitMQ node refused a message a second time' error_msg += ' with the original routing key "%s" and the emergency routing key "%s").' % ( body_json['original_routing_key'], routing_key) error_msg += ' Dropping the message.' logerror(LOGGER, error_msg) if not success: raise MessageNotDeliveredException(error_msg, msg_string)
def __check_response_for_error_codes(self, response): if response is None: msg = 'Solr returned no response (None)' logerror(LOGGER, msg) raise esgfpid.exceptions.SolrError(msg) elif response.status_code == 200: if response.content is None: msg = 'Solr returned an empty response (with content None)' logerror(LOGGER, msg) raise esgfpid.exceptions.SolrError(msg) elif response.status_code == 404: msg = 'Solr returned no response (HTTP 404)' logerror(LOGGER, msg) raise esgfpid.exceptions.SolrError(msg) else: msg = 'Solr replied with code ' + str(response.status_code) logerror(LOGGER, msg) raise esgfpid.exceptions.SolrError(msg)
def on_connection_error(self, connection, msg): oldhost = self.__node_manager.get_connection_parameters().host time_passed = datetime.datetime.now() - self.__start_connect_time loginfo( LOGGER, 'Failed connection to RabbitMQ at %s after %s seconds. Reason: %s.', oldhost, time_passed.total_seconds(), msg) # If there was a force-finish, we do not reconnect. if self.statemachine.is_FORCE_FINISHED(): # TODO This is the same code as above. Make a give_up function from it? #self.statemachine.set_to_permanently_unavailable() #self.statemachine.detail_could_not_connect = True errormsg = ( 'Permanently failed to connect to RabbitMQ. Tried all hosts %s until received a force-finish. Giving up. No PID requests will be sent.' % list(self.__all_hosts_that_were_tried)) logerror(LOGGER, errormsg) raise PIDServerException(errormsg) # If there is alternative URLs, try one of them: if self.__node_manager.has_more_urls(): logdebug(LOGGER, 'Connection failure: %s fallback URLs left to try.', self.__node_manager.get_num_left_urls()) self.__node_manager.set_next_host() newhost = self.__node_manager.get_connection_parameters().host loginfo(LOGGER, 'Connection failure: Trying to connect (now) to %s.', newhost) reopen_seconds = 0 self.__wait_and_trigger_reconnection(connection, reopen_seconds) # If there is no URLs, reset the node manager to # start at the first nodes again... else: self.__reconnect_counter += 1 if self.__reconnect_counter <= self.__max_reconnection_tries: reopen_seconds = self.__wait_seconds_before_reconnect logdebug( LOGGER, 'Connection failure: Failed connecting to all hosts. Waiting %s seconds and starting over.', reopen_seconds) self.__node_manager.reset_nodes() newhost = self.__node_manager.get_connection_parameters().host loginfo( LOGGER, 'Connection failure: Trying to connect (in %s seconds) to %s.', reopen_seconds, newhost) self.__wait_and_trigger_reconnection(connection, reopen_seconds) # Give up after so many tries... else: self.statemachine.set_to_permanently_unavailable() self.statemachine.detail_could_not_connect = True errormsg = ( 'Permanently failed to connect to RabbitMQ. Tried all hosts %s %s times. Giving up. No PID requests will be sent.' % (list(self.__all_hosts_that_were_tried), self.__max_reconnection_tries)) logerror(LOGGER, errormsg) raise PIDServerException(errormsg)
def __start_waiting_for_events(self): ''' This waits until the whole chain of callback methods triggered by "trigger_connection_to_rabbit_etc()" has finished, and then starts waiting for publications. This is done by starting the ioloop. Note: In the pika usage example, these things are both called inside the run() method, so I wonder if this check-and-wait here is necessary. Maybe not. But the usage example does not implement a Thread, so it probably blocks during the opening of the connection. Here, as it is a different thread, the run() might get called before the __init__ has finished? I'd rather stay on the safe side, as my experience of threading in Python is limited. ''' # Start ioloop if connection object ready: if self.thread._connection is not None: try: logdebug(LOGGER, 'Starting ioloop...') logtrace(LOGGER, 'ioloop is owned by connection %s...', self.thread._connection) # Tell the main thread that we're now open for events. # As soon as the thread._connection object is not None anymore, it # can receive events. self.thread.tell_publisher_to_stop_waiting_for_thread_to_accept_events( ) self.thread.continue_gently_closing_if_applicable() self.thread._connection.ioloop.start() except PIDServerException as e: raise e # It seems that some connection problems do not cause # RabbitMQ to call any callback (on_connection_closed # or on_connection_error) - it just silently swallows the # problem. # So we need to manually trigger reconnection to the next # host here, which we do by manually calling the callback. # We start the ioloop, so it can handle the reconnection events, # or also receive events from the publisher in the meantime. except Exception as e: # This catches any error during connection startup and during the entire # time the ioloop runs, blocks and waits for events. time_passed = datetime.datetime.now( ) - self.__start_connect_time time_passed_seconds = time_passed.total_seconds() # Some pika errors: if isinstance(e, pika.exceptions.ProbableAuthenticationError): errorname = self.__make_error_name( e, 'e.g. wrong user or password') elif isinstance(e, pika.exceptions.ProbableAccessDeniedError): errorname = self.__make_error_name( e, 'e.g. wrong virtual host name') elif isinstance(e, pika.exceptions.IncompatibleProtocolError): errorname = self.__make_error_name( e, 'e.g. trying TLS/SSL on wrong port') # Other errors: else: errorname = self.__make_error_name(e) logdebug( LOGGER, 'Unexpected error during event listener\'s lifetime (after %s seconds): %s', time_passed_seconds, errorname) # Now trigger reconnection: self.statemachine.set_to_waiting_to_be_available() self.on_connection_error(self.thread._connection, errorname) self.thread._connection.ioloop.start() else: # I'm quite sure that this cannot happen, as the connection object # is created in "trigger_connection_...()" and thus exists, no matter # if the actual connection to RabbitMQ succeeded (yet) or not. logdebug(LOGGER, 'This cannot happen: Connection object is not ready.') logerror( LOGGER, 'Cannot happen. Cannot properly start the thread. Connection object is not ready.' )
def __start_waiting_for_events(self): ''' This waits until the whole chain of callback methods triggered by "trigger_connection_to_rabbit_etc()" has finished, and then starts waiting for publications. This is done by starting the ioloop. Note: In the pika usage example, these things are both called inside the run() method, so I wonder if this check-and-wait here is necessary. Maybe not. But the usage example does not implement a Thread, so it probably blocks during the opening of the connection. Here, as it is a different thread, the run() might get called before the __init__ has finished? I'd rather stay on the safe side, as my experience of threading in Python is limited. ''' # Start ioloop if connection object ready: if self.thread._connection is not None: try: logdebug(LOGGER, 'Starting ioloop...') logtrace(LOGGER, 'ioloop is owned by connection %s...', self.thread._connection) # Tell the main thread that we're now open for events. # As soon as the thread._connection object is not None anymore, it # can receive events. self.thread.tell_publisher_to_stop_waiting_for_thread_to_accept_events( ) self.thread.continue_gently_closing_if_applicable() self.thread._connection.ioloop.start() except pika.exceptions.ProbableAuthenticationError as e: time_passed = datetime.datetime.now( ) - self.__start_connect_time logerror( LOGGER, 'Caught Authentication Exception after %s seconds during connection ("%s").', time_passed.total_seconds(), e.__class__.__name__) self.statemachine.set_to_waiting_to_be_available() self.statemachine.detail_authentication_exception = True # TODO WHAT FOR? # It seems that ProbableAuthenticationErrors do not cause # RabbitMQ to call any callback, either on_connection_closed # or on_connection_error - it just silently swallows the # problem. # So we need to manually trigger reconnection to the next # host here, which we do by manually calling the callback. errorname = 'ProbableAuthenticationError issued by pika' self.on_connection_error(self.thread._connection, errorname) # We start the ioloop, so it can handle the reconnection events, # or also receive events from the publisher in the meantime. self.thread._connection.ioloop.start() except Exception as e: # This catches any error during connection startup and during the entire # time the ioloop runs, blocks and waits for events. logerror( LOGGER, 'Unexpected error during event listener\'s lifetime: %s: %s', e.__class__.__name__, e.message) # As we will try to reconnect, set state to waiting to connect. # If reconnection fails, it will be set to permanently unavailable. self.statemachine.set_to_waiting_to_be_available() # In case this error is reached, it seems that no callback # was called that handles the problem. Let's try to reconnect # somewhere else. errorname = 'Unexpected error (' + str( e.__class__.__name__) + ': ' + str(e.message) + ')' self.on_connection_error(self.thread._connection, errorname) # We start the ioloop, so it can handle the reconnection events, # or also receive events from the publisher in the meantime. self.thread._connection.ioloop.start() else: # I'm quite sure that this cannot happen, as the connection object # is created in "trigger_connection_...()" and thus exists, no matter # if the actual connection to RabbitMQ succeeded (yet) or not. logdebug(LOGGER, 'This cannot happen: Connection object is not ready.') logerror( LOGGER, 'Cannot happen. Cannot properly start the thread. Connection object is not ready.' )
def __setup_rabbit_connection(self, params): LOGGER.debug('Setting up the connection with the RabbitMQ.') self.__start_connect_time = datetime.datetime.now() self.__all_hosts_that_were_tried.add(params.host) try: time_now = get_now_utc_as_formatted_string() logdebug(LOGGER, 'Connecting to RabbitMQ at %s... (%s)', params.host, time_now) # Make connection conn = self.__make_connection(params) # Log success time_now = get_now_utc_as_formatted_string() time_passed = datetime.datetime.now() - self.__start_connect_time time_seconds = time_passed.total_seconds() loginfo( LOGGER, 'Connection to RabbitMQ at %s opened after %i seconds... (%s)', params.host, time_seconds, time_now) return conn except pika.exceptions.ProbableAuthenticationError as e: time_passed = datetime.datetime.now() - self.__start_connect_time time_seconds = time_passed.total_seconds() error_name = e.__class__.__name__ logerror( LOGGER, 'Caught Authentication Exception after %s seconds during connection ("%s").', time_seconds, error_name) msg = ( 'Problem setting up the rabbit with username "%s" and password "%s" at url %s.' % (params.credentials.username, params.credentials.password, params.host)) LOGGER.error(msg) self.__error_messages_during_init.append(msg) return None except pika.exceptions.AMQPConnectionError as e: time_passed = datetime.datetime.now() - self.__start_connect_time time_seconds = time_passed.total_seconds() error_name = e.__class__.__name__ logerror( LOGGER, 'Caught AMQPConnectionError Exception after %s seconds during connection ("%s").', time_seconds, error_name) msg = ('Problem setting up the rabbit connection to %s.' % params.host) self.__error_messages_during_init.append(msg) return None except Exception as e: time_passed = datetime.datetime.now() - self.__start_connect_time time_seconds = time_passed.total_seconds() error_name = e.__class__.__name__ logerror( LOGGER, 'Error ("%s") during connection to %s, after %s seconds.', error_name, params.host, time_seconds) msg = ( 'Unexpected problem setting up the rabbit connection to %s (%s)' % (params.host, error_name)) self.__error_messages_during_init.append(msg) raise e
def open_rabbit_connection(self): continue_connecting = True while continue_connecting: success = self.__try_connecting_to_next() if success: continue_connecting = False else: # Log failure: oldhost = self.__nodemanager.get_connection_parameters().host time_passed = datetime.datetime.now( ) - self.__start_connect_time loginfo( LOGGER, 'Failed connection to RabbitMQ at %s after %s seconds.', oldhost, time_passed.total_seconds()) # If there is alternative URLs, try one of them: if self.__nodemanager.has_more_urls(): logdebug( LOGGER, 'Connection failure: %s fallback URLs left to try.', self.__nodemanager.get_num_left_urls()) self.__nodemanager.set_next_host() newhost = self.__nodemanager.get_connection_parameters( ).host loginfo(LOGGER, 'Next connection attempt (now) %s.', newhost) # If there is no URLs, reset the node manager to # start at the first nodes again... else: self.__reconnect_counter += 1 if self.__reconnect_counter <= self.__max_reconnection_tries: reopen_seconds = self.__wait_seconds_before_reconnect logdebug( LOGGER, 'Connection failure: Failed connecting to all hosts. Waiting %s seconds and starting over.', reopen_seconds) self.__nodemanager.reset_nodes() newhost = self.__nodemanager.get_connection_parameters( ).host loginfo( LOGGER, 'Next connection attempt (in %s seconds) to %s.', reopen_seconds, newhost) time.sleep(reopen_seconds) # Give up after so many tries... else: continue_connecting = False errormsg = ( 'Permanently failed to connect to RabbitMQ. Tried all hosts %s %s times. Giving up. No PID requests will be sent.' % (list(self.__all_hosts_that_were_tried), self.__max_reconnection_tries)) logerror(LOGGER, errormsg) collected_errors = ' - '.join( self.__error_messages_during_init) logwarn( LOGGER, 'No connection possible. Errors: %s' % collected_errors) raise PIDServerException(errormsg)