def setUp(self): self.tearDown() self._connection = get_central_connection() self._connection.begin_transaction() purge_customer(self._connection, _test_username) create_customer(self._connection, _test_username) add_key_to_customer(self._connection, _test_username) self._connection.commit()
def _load_collection_id_queue(collection_id_queue): """ load every known collection into the queue """ connection = get_central_connection() rows = connection.fetch_all_rows( "select id from nimbusio_central.collection order by id") connection.close() for (collection_id, ) in rows: collection_id_queue.append(collection_id)
def _load_collection_id_queue(collection_id_queue): """ load every known collection into the queue """ connection = get_central_connection() rows = connection.fetch_all_rows( "select id from nimbusio_central.collection order by id" ) connection.close() for (collection_id, ) in rows: collection_id_queue.append(collection_id)
def delete_all_motoboto_test_segments(): central_conn = get_central_connection() local_conn = get_node_local_connection() collection_id_rows = central_conn.fetch_all_rows(_test_collections_query, []) central_conn.close() local_conn.begin_transaction() local_conn.execute("create temp table tmp_motoboto_collection_ids (id int4 not null)", []) for row in collection_id_rows: local_conn.execute("insert into tmp_motoboto_collection_ids values (%s)", row) for query in _delete_test_collections_data: rowcount = local_conn.execute(query, []) if rowcount: print "Deleted %s via %s" % (rowcount, query.split("\n", 1)[0]) local_conn.commit()
def main(): """ main entry point """ options = _parse_command_line() connection = get_central_connection() try: _dispatch_table[options.command](connection, options) except Exception: connection.rollback() raise else: connection.commit() finally: connection.close() return 0
def main(): """ main entry point """ options = _parse_command_line() connection = get_central_connection() connection.begin_transaction() try: _dispatch_table[options.command](connection, options) except Exception: connection.rollback() raise else: connection.commit() finally: connection.close() return 0
def get_versioned_collections(): """ return the set of all collection ids for which versioning is true """ log = logging.getLogger("get_versioned_collections") versioned_collections = set() connection = get_central_connection() try: for (collection_id, ) in connection.fetch_all_rows(_query, []): versioned_collections.add(collection_id) finally: connection.close() log.info("found {0} versioned collectons".format( len(versioned_collections))) return versioned_collections
def get_versioned_collections(): """ return the set of all collection ids for which versioning is true """ log = logging.getLogger("get_versioned_collections") versioned_collections = set() connection = get_central_connection() try: for (collection_id, ) in connection.fetch_all_rows(_query, []): versioned_collections.add(collection_id) finally: connection.close() log.info("found {0} versioned collectons".format( len(versioned_collections) )) return versioned_collections
def delete_all_motoboto_test_segments(): central_conn = get_central_connection() local_conn = get_node_local_connection() collection_id_rows = central_conn.fetch_all_rows(_test_collections_query, []) central_conn.close() local_conn.begin_transaction() local_conn.execute( "create temp table tmp_motoboto_collection_ids (id int4 not null)", []) for row in collection_id_rows: local_conn.execute( "insert into tmp_motoboto_collection_ids values (%s)", row) for query in _delete_test_collections_data: rowcount = local_conn.execute(query, []) if rowcount: print "Deleted %s via %s" % ( rowcount, query.split("\n", 1)[0], ) local_conn.commit()
def _setup(_halt_event, state): log = logging.getLogger("_setup") # do the event push client first, because we may need to # push an execption event from setup state["event-push-client"] = EventPushClient(state["zmq-context"], "data_writer") log.info("binding resilient-server to %s" % (_data_writer_address,)) state["resilient-server"] = ResilientServer(state["zmq-context"], _data_writer_address, state["receive-queue"]) state["resilient-server"].register(state["pollster"]) state["queue-dispatcher"] = DequeDispatcher(state, state["receive-queue"], _dispatch_table) central_connection = get_central_connection() state["cluster-row"] = get_cluster_row(central_connection) state["node-rows"] = get_node_rows(central_connection, state["cluster-row"].id) central_connection.close() state["node-id-dict"] = dict([(node_row.name, node_row.id) for node_row in state["node-rows"]]) state["database-connection"] = get_node_local_connection() # Ticket #1646 mark output value files as closed at startup mark_value_files_as_closed(state["database-connection"]) state["writer"] = Writer(state["database-connection"], _repository_path) state["stats-reporter"] = StatsReporter(state) state["event-push-client"].info("program-start", "data_writer starts") return [ (state["pollster"].run, time.time()), (state["queue-dispatcher"].run, time.time()), (state["stats-reporter"].run, state["stats-reporter"].next_run()), ]
def __init__(self, transaction=True): self._log = logging.getLogger("SpaceAccountingDatabase") self._connection = get_central_connection() if transaction: self._connection.execute("BEGIN;")
def main(): """ main entry point return 0 for success (exit code) """ initialize_logging(_log_path) log = logging.getLogger("main") log.info("program starts") halt_event = Event() set_signal_handler(halt_event) zeromq_context = zmq.Context() event_push_client = EventPushClient(zeromq_context, "redis_stats_collector") event_push_client.info("program-start", "flush_stats_from_redis starts") # don't flush anything newer than 1 minute ago current_time = datetime.utcnow() timestamp_cutoff = current_time - timedelta(minutes=1) return_code = 0 central_db_connection = None collection_ops_accounting_rows = list() # values to be added to the dedupe table new_dedupes = list() # keys to be deleted (a list for each node node_keys_processed = [list() for _ in _node_names] try: central_db_connection = get_central_connection() # On startup, the program connects to the central database and tries # to acquire a pg_advisory_lock appropriate for this program and the # data center it is running in using the pg_try_advisory_lock function. # If it cannot acquire the lock, it notes the status of the lock # and exits. This central locking mechanism lets us avoid single points # of failure by configuring the program to run on multiple nodes. with advisory_lock(central_db_connection, "redis_stats_collector"): node_dict = _retrieve_node_dict(central_db_connection) for node_name, keys_processed in \ zip(_node_names, node_keys_processed): node_id = node_dict[node_name] log.debug("processing node {0} node_id={1}".format(node_name, node_id)) # The program then selects into memory all recently collected # keys from the central database table # collection_ops_accounting_flush_dedupe and stores them in a # dedupe set. This set allows runs of the collection/flush # program to be idempotent across some time period ( # but we won't keep the list of old keys forever.) dedupe_set = _retrieve_dedupe_set(central_db_connection, node_id) # The program then visits the Redis instance on every storage # node in the local data center, collecting the data from all # past stats keys -- aggregating it into the program's memory. # The aggregation should involve buckets for each # storage_node_id and redis key, corresponding to the columns # in the database. _process_one_node(node_name, node_dict[node_name], timestamp_cutoff, dedupe_set, collection_ops_accounting_rows, new_dedupes, keys_processed) # After collecting past keys from every storage node, # inside a central database transaction: # 1. Insert the collected stats into the central database # collection_ops_accounting # 2. Insert collected keys into recently collected keys # collection_ops_accounting_flush_dedupe. # 3. commit transaction log.debug("updating central database") central_db_connection.begin_transaction() try: _insert_accounting_rows(central_db_connection, collection_ops_accounting_rows) _insert_dedupe_rows(central_db_connection, timestamp_cutoff, new_dedupes) except Exception: central_db_connection.rollback() raise else: central_db_connection.commit() # Then revisit the Redis nodes, and delete the keys we flushed # into the database, and any keys we skipped because they were # found in the dedupe set. for node_name, keys_processed in zip(_node_names, node_keys_processed): _remove_processed_keys(node_name, keys_processed) except Exception as instance: log.exception("Uhandled exception {0}".format(instance)) event_push_client.exception( unhandled_exception_topic, str(instance), exctype=instance.__class__.__name__ ) return_code = 1 if central_db_connection is not None: central_db_connection.close() event_push_client.close() zeromq_context.term() log.info("program terminates return_code = {0}".format(return_code)) return return_code
def __init__(self): self._log = logging.getLogger("WebInternalReader") memcached_client = memcache.Client(_memcached_nodes) self._central_connection = get_central_connection() self._cluster_row = get_cluster_row(self._central_connection) self._node_local_connection = get_node_local_connection() self._deliverator = Deliverator() self._zeromq_context = zmq.Context() self._pull_server = GreenletPULLServer( self._zeromq_context, _web_internal_reader_pipeline_address, self._deliverator ) self._pull_server.link_exception(self._unhandled_greenlet_exception) self._data_reader_clients = list() self._data_readers = list() for node_name, address in zip(_node_names, _data_reader_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_internal_reader_pipeline_address, self._deliverator, connect_messages=[] ) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_reader_clients.append(resilient_client) data_reader = DataReader( node_name, resilient_client ) self._data_readers.append(data_reader) self._space_accounting_dealer_client = GreenletDealerClient( self._zeromq_context, _local_node_name, _space_accounting_server_address ) self._space_accounting_dealer_client.link_exception( self._unhandled_greenlet_exception ) push_client = GreenletPUSHClient( self._zeromq_context, _local_node_name, _space_accounting_pipeline_address, ) self._accounting_client = SpaceAccountingClient( _local_node_name, self._space_accounting_dealer_client, push_client ) self._event_push_client = EventPushClient( self._zeromq_context, "web-internal-reader" ) # message sent to data readers telling them the server # is (re)starting, thereby invalidating any archvies or retrieved # that are in progress for this node timestamp = create_timestamp() self._event_push_client.info("web-reader-start", "web reader (re)start", timestamp_repr=repr(timestamp), source_node_name=_local_node_name) self._watcher = Watcher( _stats, self._data_reader_clients, self._event_push_client ) self.application = Application( memcached_client, self._central_connection, self._node_local_connection, self._cluster_row, self._data_readers, self._accounting_client, self._event_push_client, _stats ) self.wsgi_server = WSGIServer( (_web_internal_reader_host, _web_internal_reader_port), application=self.application, backlog=_wsgi_backlog )
def __init__(self): self._log = logging.getLogger("WebInternalReader") memcached_client = memcache.Client(_memcached_nodes) self._central_connection = get_central_connection() self._cluster_row = get_cluster_row(self._central_connection) self._node_local_connection = get_node_local_connection() self._deliverator = Deliverator() self._zeromq_context = zmq.Context() self._pull_server = GreenletPULLServer( self._zeromq_context, _web_internal_reader_pipeline_address, self._deliverator) self._pull_server.link_exception(self._unhandled_greenlet_exception) self._data_reader_clients = list() self._data_readers = list() for node_name, address in zip(_node_names, _data_reader_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_internal_reader_pipeline_address, self._deliverator, connect_messages=[]) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_reader_clients.append(resilient_client) data_reader = DataReader(node_name, resilient_client) self._data_readers.append(data_reader) self._space_accounting_dealer_client = GreenletDealerClient( self._zeromq_context, _local_node_name, _space_accounting_server_address) self._space_accounting_dealer_client.link_exception( self._unhandled_greenlet_exception) push_client = GreenletPUSHClient( self._zeromq_context, _local_node_name, _space_accounting_pipeline_address, ) self._accounting_client = SpaceAccountingClient( _local_node_name, self._space_accounting_dealer_client, push_client) self._event_push_client = EventPushClient(self._zeromq_context, "web-internal-reader") # message sent to data readers telling them the server # is (re)starting, thereby invalidating any archvies or retrieved # that are in progress for this node timestamp = create_timestamp() self._event_push_client.info("web-reader-start", "web reader (re)start", timestamp_repr=repr(timestamp), source_node_name=_local_node_name) self._watcher = Watcher(_stats, self._data_reader_clients, self._event_push_client) self.application = Application(memcached_client, self._central_connection, self._node_local_connection, self._cluster_row, self._data_readers, self._accounting_client, self._event_push_client, _stats) self.wsgi_server = WSGIServer( (_web_internal_reader_host, _web_internal_reader_port), application=self.application, backlog=_wsgi_backlog)
def _setup(_halt_event, state): log = logging.getLogger("_setup") status_checkers = list() # do the event push client first, because we may need to # push an execption event from setup state["event-push-client"] = EventPushClient( state["zmq-context"], "handoff_server" ) central_connection = get_central_connection() state["cluster-row"] = get_cluster_row(central_connection) state["node-rows"] = get_node_rows( central_connection, state["cluster-row"].id ) central_connection.close() state["node-id-dict"] = dict( [(node_row.name, node_row.id, ) for node_row in state["node-rows"]] ) state["node-name-dict"] = dict( [(node_row.id, node_row.name, ) for node_row in state["node-rows"]] ) state["database-connection"] = get_node_local_connection() for node_row, handoff_server_address in zip( state["node-rows"], _handoff_server_addresses ): if node_row.name == _local_node_name: log.info("binding resilient-server to %s" % ( handoff_server_address, )) state["resilient-server"] = ResilientServer( state["zmq-context"], handoff_server_address, state["receive-queue"] ) state["resilient-server"].register(state["pollster"]) else: handoff_server_client = ResilientClient( state["zmq-context"], state["pollster"], node_row.name, handoff_server_address, _client_tag, _handoff_server_pipeline_address ) state["handoff-server-clients"].append(handoff_server_client) # don't run all the status checkers at the same time status_checkers.append( (handoff_server_client.run, time.time() + random.random() * 60.0, ) ) log.info("binding pull-server to %s" % (_handoff_server_pipeline_address, )) state["pull-server"] = PULLServer( state["zmq-context"], _handoff_server_pipeline_address, state["receive-queue"] ) state["pull-server"].register(state["pollster"]) for node_row, data_reader_address in zip( state["node-rows"], _data_reader_addresses ): data_reader_client = ResilientClient( state["zmq-context"], state["pollster"], node_row.name, data_reader_address, _client_tag, _handoff_server_pipeline_address ) state["reader-client-dict"][data_reader_client.server_node_name] = \ data_reader_client # don't run all the status checkers at the same time status_checkers.append( (data_reader_client.run, time.time() + random.random() * 60.0, ) ) for node_row, data_writer_address in zip( state["node-rows"], _data_writer_addresses ): data_writer_client = ResilientClient( state["zmq-context"], state["pollster"], node_row.name, data_writer_address, _client_tag, _handoff_server_pipeline_address ) state["writer-client-dict"][data_writer_client.server_node_name] = \ data_writer_client # don't run all the status checkers at the same time status_checkers.append( (data_writer_client.run, time.time() + random.random() * 60.0, ) ) state["queue-dispatcher"] = DequeDispatcher( state, state["receive-queue"], _dispatch_table ) state["handoff-requestor"] = HandoffRequestor(state, _local_node_name) state["handoff-starter"] = HandoffStarter( state, _local_node_name, state["event-push-client"] ) state["event-push-client"].info("program-start", "handoff_server starts") timer_driven_callbacks = [ (state["handoff-starter"].run, state["handoff-starter"].next_run(), ), (state["pollster"].run, time.time(), ), (state["queue-dispatcher"].run, time.time(), ), # try to spread out handoff polling, if all nodes start together (state["handoff-requestor"].run, time.time() + random.random() * handoff_polling_interval) ] timer_driven_callbacks.extend(status_checkers) return timer_driven_callbacks
def __init__(self): self._log = logging.getLogger("WebServer") authenticator = SqlAuthenticator() self._central_connection = get_central_connection() self._cluster_row = get_cluster_row(self._central_connection) self._node_local_connection = get_node_local_connection() self._unified_id_factory = UnifiedIDFactory( self._central_connection, _get_shard_id(self._central_connection, self._cluster_row.id) ) self._deliverator = Deliverator() self._zeromq_context = zmq.Context() self._pull_server = GreenletPULLServer( self._zeromq_context, _web_server_pipeline_address, self._deliverator ) self._pull_server.link_exception(self._unhandled_greenlet_exception) # message sent to data readers and writers telling them the server # is (re)starting, thereby invalidating any archvies or retrieved # that are in progress for this node timestamp = create_timestamp() start_message = { "message-type" : "web-server-start", "priority" : create_priority(), "unified-id" : self._unified_id_factory.next(), "timestamp-repr" : repr(timestamp), "source-node-name" : _local_node_name, } self._data_writer_clients = list() for node_name, address in zip(_node_names, _data_writer_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_server_pipeline_address, self._deliverator, connect_messages=[start_message, ] ) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_writer_clients.append(resilient_client) self._data_reader_clients = list() self._data_readers = list() for node_name, address in zip(_node_names, _data_reader_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_server_pipeline_address, self._deliverator, connect_messages=[start_message, ] ) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_reader_clients.append(resilient_client) data_reader = DataReader( node_name, resilient_client ) self._data_readers.append(data_reader) self._space_accounting_dealer_client = GreenletDealerClient( self._zeromq_context, _local_node_name, _space_accounting_server_address ) self._space_accounting_dealer_client.link_exception( self._unhandled_greenlet_exception ) push_client = GreenletPUSHClient( self._zeromq_context, _local_node_name, _space_accounting_pipeline_address, ) self._accounting_client = SpaceAccountingClient( _local_node_name, self._space_accounting_dealer_client, push_client ) self._event_push_client = EventPushClient( self._zeromq_context, "web-server" ) self._watcher = Watcher( _stats, self._data_reader_clients, self._data_writer_clients, self._event_push_client ) id_translator_keys_path = os.path.join( _repository_path, "id_translator_keys.pkl" ) with open(id_translator_keys_path, "r") as input_file: id_translator_keys = pickle.load(input_file) self._id_translator = InternalIDTranslator( id_translator_keys["key"], id_translator_keys["hmac_key"], id_translator_keys["iv_key"], id_translator_keys["hmac_size"] ) self.application = Application( self._central_connection, self._node_local_connection, self._cluster_row, self._unified_id_factory, self._id_translator, self._data_writer_clients, self._data_readers, authenticator, self._accounting_client, self._event_push_client, _stats ) self.wsgi_server = WSGIServer( (_web_server_host, _web_server_port), application=self.application, backlog=_wsgi_backlog )
def setUp(self): if not hasattr(self, "_log"): self._log = logging.getLogger("TestHandoffServer") self.tearDown() database_connection = get_central_connection() cluster_row = get_cluster_row(database_connection) node_rows = get_node_rows(database_connection, cluster_row.id) database_connection.close() self._key_generator = generate_key() self._event_publisher_processes = list() self._data_writer_processes = list() self._data_reader_processes = list() self._handoff_server_processes = list() for i in xrange(_node_count): node_name = _generate_node_name(i) repository_path = _repository_path(node_name) os.makedirs(repository_path) process = start_event_publisher(node_name, _event_publisher_pull_addresses[i], _event_publisher_pub_addresses[i]) poll_result = poll_process(process) self.assertEqual(poll_result, None) self._event_publisher_processes.append(process) time.sleep(1.0) process = start_data_writer(_cluster_name, node_name, _data_writer_addresses[i], _event_publisher_pull_addresses[i], repository_path) poll_result = poll_process(process) self.assertEqual(poll_result, None) self._data_writer_processes.append(process) time.sleep(1.0) process = start_data_reader(node_name, _data_reader_addresses[i], _event_publisher_pull_addresses[i], repository_path) poll_result = poll_process(process) self.assertEqual(poll_result, None) self._data_reader_processes.append(process) time.sleep(1.0) process = start_handoff_server( _cluster_name, node_name, _handoff_server_addresses, _handoff_server_pipeline_addresses[i], _data_reader_addresses, _data_writer_addresses, _event_publisher_pull_addresses[i], _repository_path(node_name)) poll_result = poll_process(process) self.assertEqual(poll_result, None) self._handoff_server_processes.append(process) time.sleep(1.0) self._context = zmq.context.Context() self._pollster = GreenletZeroMQPollster() self._deliverator = Deliverator() self._pull_server = GreenletPULLServer(self._context, _client_address, self._deliverator) self._pull_server.register(self._pollster) backup_nodes = random.sample(node_rows[1:], 2) self._log.debug("backup nodes = %s" % ([n.name for n in backup_nodes], )) self._resilient_clients = list() for node_row, address in zip(node_rows, _data_writer_addresses): if not node_row in backup_nodes: continue resilient_client = GreenletResilientClient( self._context, self._pollster, node_row.name, address, _local_node_name, _client_address, self._deliverator, ) self._resilient_clients.append(resilient_client) self._log.debug("%s resilient clients" % (len(self._resilient_clients), )) self._data_writer_handoff_client = DataWriterHandoffClient( node_rows[0].name, self._resilient_clients) self._pollster.start()
def setUp(self): if not hasattr(self, "_log"): self._log = logging.getLogger("TestHandoffServer") self.tearDown() database_connection = get_central_connection() cluster_row = get_cluster_row(database_connection) node_rows = get_node_rows(database_connection, cluster_row.id) database_connection.close() self._key_generator = generate_key() self._event_publisher_processes = list() self._data_writer_processes = list() self._data_reader_processes = list() self._handoff_server_processes = list() for i in xrange(_node_count): node_name = _generate_node_name(i) repository_path = _repository_path(node_name) os.makedirs(repository_path) process = start_event_publisher( node_name, _event_publisher_pull_addresses[i], _event_publisher_pub_addresses[i] ) poll_result = poll_process(process) self.assertEqual(poll_result, None) self._event_publisher_processes.append(process) time.sleep(1.0) process = start_data_writer( _cluster_name, node_name, _data_writer_addresses[i], _event_publisher_pull_addresses[i], repository_path ) poll_result = poll_process(process) self.assertEqual(poll_result, None) self._data_writer_processes.append(process) time.sleep(1.0) process = start_data_reader( node_name, _data_reader_addresses[i], _event_publisher_pull_addresses[i], repository_path ) poll_result = poll_process(process) self.assertEqual(poll_result, None) self._data_reader_processes.append(process) time.sleep(1.0) process = start_handoff_server( _cluster_name, node_name, _handoff_server_addresses, _handoff_server_pipeline_addresses[i], _data_reader_addresses, _data_writer_addresses, _event_publisher_pull_addresses[i], _repository_path(node_name) ) poll_result = poll_process(process) self.assertEqual(poll_result, None) self._handoff_server_processes.append(process) time.sleep(1.0) self._context = zmq.context.Context() self._pollster = GreenletZeroMQPollster() self._deliverator = Deliverator() self._pull_server = GreenletPULLServer( self._context, _client_address, self._deliverator ) self._pull_server.register(self._pollster) backup_nodes = random.sample(node_rows[1:], 2) self._log.debug("backup nodes = %s" % ( [n.name for n in backup_nodes], )) self._resilient_clients = list() for node_row, address in zip(node_rows, _data_writer_addresses): if not node_row in backup_nodes: continue resilient_client = GreenletResilientClient( self._context, self._pollster, node_row.name, address, _local_node_name, _client_address, self._deliverator, ) self._resilient_clients.append(resilient_client) self._log.debug("%s resilient clients" % ( len(self._resilient_clients), )) self._data_writer_handoff_client = DataWriterHandoffClient( node_rows[0].name, self._resilient_clients ) self._pollster.start()
def _setup(_halt_event, state): log = logging.getLogger("_setup") status_checkers = list() # do the event push client first, because we may need to # push an execption event from setup state["event-push-client"] = EventPushClient( state["zmq-context"], "anti_entropy_server" ) state["central-database-connection"] = get_central_connection() state["local-database-connection"] = get_node_local_connection() state["cluster-row"] = get_cluster_row( state["central-database-connection"] ) local_anti_entropy_server_address = None for node_name, address in zip(_node_names, _anti_entropy_server_addresses): if node_name == _local_node_name: local_anti_entropy_server_address = address break assert local_anti_entropy_server_address is not None log.info("binding resilient-server to %s" % ( local_anti_entropy_server_address, )) state["resilient-server"] = ResilientServer( state["zmq-context"], local_anti_entropy_server_address, state["receive-queue"] ) state["resilient-server"].register(state["pollster"]) log.info("binding pull-server to %s" % ( _anti_entropy_server_pipeline_address, )) state["pull-server"] = PULLServer( state["zmq-context"], _anti_entropy_server_pipeline_address, state["receive-queue"] ) state["pull-server"].register(state["pollster"]) state["anti-entropy-clients"] = list() for node_name, anti_entropy_server_address in zip( _node_names, _anti_entropy_server_addresses ): resilient_client = ResilientClient( state["zmq-context"], state["pollster"], node_name, anti_entropy_server_address, _client_tag, _anti_entropy_server_pipeline_address ) state["anti-entropy-clients"].append(resilient_client) status_checkers.append( (resilient_client.run, time.time() + random.random() * 60.0, ) ) state["queue-dispatcher"] = DequeDispatcher( state, state["receive-queue"], _dispatch_table ) state["collection-list-requestor"] = CollectionListRequestor(state) state["consistency-check-starter"] = ConsistencyCheckStarter( state, _start_consistency_check ) state["retry-manager"] = RetryManager( state, _start_consistency_check ) state["state-cleaner"] = StateCleaner(state) state["event-push-client"].info( "program-start", "anti_entropy_server starts" ) # start the collection list requestor right away # start the consistency check starter a little later, when # we presumably have some collection ids timer_driven_callbacks = [ (state["pollster"].run, time.time(), ), (state["queue-dispatcher"].run, time.time(), ), (state["collection-list-requestor"].run, time.time(), ), (state["consistency-check-starter"].run, time.time()+60.0, ), (state["retry-manager"].run, state["retry-manager"].next_run(), ), (state["state-cleaner"].run, state["state-cleaner"].next_run(), ), ] timer_driven_callbacks.extend(status_checkers) return timer_driven_callbacks
def _setup(state): log = logging.getLogger("_setup") # do the event push client first, because we may need to # push an execption event from setup state["event-push-client"] = EventPushClient( state["zmq-context"], "data_writer" ) log.info("binding resilient-server to {0}".format(_data_writer_address)) state["resilient-server"] = ResilientServer( state["zmq-context"], _data_writer_address, state["message-queue"] ) state["resilient-server"].register(state["pollster"]) log.info("binding reply-pull-server to {0}".format( _writer_thread_reply_address)) state["reply-pull-server"] = ReplyPULLServer( state["zmq-context"], _writer_thread_reply_address, state["resilient-server"].send_reply ) state["reply-pull-server"].register(state["pollster"]) log.info("binding anti-entropy-server to {0}".format( _data_writer_anti_entropy_address)) state["anti-entropy-server"] = REPServer( state["zmq-context"], _data_writer_anti_entropy_address, state["message-queue"] ) state["anti-entropy-server"].register(state["pollster"]) topics = ["web-writer-start", ] log.info("connecting sub-client to {0} subscribing to {1}".format( _event_aggregator_pub_address, topics)) state["sub-client"] = SUBClient( state["zmq-context"], _event_aggregator_pub_address, topics, state["message-queue"] ) state["sub-client"].register(state["pollster"]) central_connection = get_central_connection() state["cluster-row"] = get_cluster_row(central_connection) state["node-rows"] = get_node_rows( central_connection, state["cluster-row"].id ) central_connection.close() state["node-id-dict"] = dict( [(node_row.name, node_row.id, ) for node_row in state["node-rows"]] ) state["event-push-client"].info("program-start", "data_writer starts") state["reply-push-client"] = PUSHClient(state["zmq-context"], _writer_thread_reply_address) state["writer-thread"] = WriterThread(state["halt-event"], state["node-id-dict"], state["message-queue"], state["reply-push-client"]) state["writer-thread"].start() state["sync-thread"] = SyncThread(state["halt-event"], state["message-queue"]) state["sync-thread"].start()