def _get_shard_id(central_connection, cluster_id): """ use node_id as shard id """ for node_row in get_node_rows(central_connection, cluster_id): if node_row.name == _local_node_name: return node_row.id # if we make it here, this cluster is misconfigured raise ValueError( "node name {0} is not in node rows for cluster {1}".format( _local_node_name, cluster_id ) )
def _setup(_halt_event, state): log = logging.getLogger("_setup") # do the event push client first, because we may need to # push an execption event from setup state["event-push-client"] = EventPushClient(state["zmq-context"], "data_writer") log.info("binding resilient-server to %s" % (_data_writer_address,)) state["resilient-server"] = ResilientServer(state["zmq-context"], _data_writer_address, state["receive-queue"]) state["resilient-server"].register(state["pollster"]) state["queue-dispatcher"] = DequeDispatcher(state, state["receive-queue"], _dispatch_table) central_connection = get_central_connection() state["cluster-row"] = get_cluster_row(central_connection) state["node-rows"] = get_node_rows(central_connection, state["cluster-row"].id) central_connection.close() state["node-id-dict"] = dict([(node_row.name, node_row.id) for node_row in state["node-rows"]]) state["database-connection"] = get_node_local_connection() # Ticket #1646 mark output value files as closed at startup mark_value_files_as_closed(state["database-connection"]) state["writer"] = Writer(state["database-connection"], _repository_path) state["stats-reporter"] = StatsReporter(state) state["event-push-client"].info("program-start", "data_writer starts") return [ (state["pollster"].run, time.time()), (state["queue-dispatcher"].run, time.time()), (state["stats-reporter"].run, state["stats-reporter"].next_run()), ]
def _setup(_halt_event, state): log = logging.getLogger("_setup") status_checkers = list() # do the event push client first, because we may need to # push an execption event from setup state["event-push-client"] = EventPushClient( state["zmq-context"], "handoff_server" ) central_connection = get_central_connection() state["cluster-row"] = get_cluster_row(central_connection) state["node-rows"] = get_node_rows( central_connection, state["cluster-row"].id ) central_connection.close() state["node-id-dict"] = dict( [(node_row.name, node_row.id, ) for node_row in state["node-rows"]] ) state["node-name-dict"] = dict( [(node_row.id, node_row.name, ) for node_row in state["node-rows"]] ) state["database-connection"] = get_node_local_connection() for node_row, handoff_server_address in zip( state["node-rows"], _handoff_server_addresses ): if node_row.name == _local_node_name: log.info("binding resilient-server to %s" % ( handoff_server_address, )) state["resilient-server"] = ResilientServer( state["zmq-context"], handoff_server_address, state["receive-queue"] ) state["resilient-server"].register(state["pollster"]) else: handoff_server_client = ResilientClient( state["zmq-context"], state["pollster"], node_row.name, handoff_server_address, _client_tag, _handoff_server_pipeline_address ) state["handoff-server-clients"].append(handoff_server_client) # don't run all the status checkers at the same time status_checkers.append( (handoff_server_client.run, time.time() + random.random() * 60.0, ) ) log.info("binding pull-server to %s" % (_handoff_server_pipeline_address, )) state["pull-server"] = PULLServer( state["zmq-context"], _handoff_server_pipeline_address, state["receive-queue"] ) state["pull-server"].register(state["pollster"]) for node_row, data_reader_address in zip( state["node-rows"], _data_reader_addresses ): data_reader_client = ResilientClient( state["zmq-context"], state["pollster"], node_row.name, data_reader_address, _client_tag, _handoff_server_pipeline_address ) state["reader-client-dict"][data_reader_client.server_node_name] = \ data_reader_client # don't run all the status checkers at the same time status_checkers.append( (data_reader_client.run, time.time() + random.random() * 60.0, ) ) for node_row, data_writer_address in zip( state["node-rows"], _data_writer_addresses ): data_writer_client = ResilientClient( state["zmq-context"], state["pollster"], node_row.name, data_writer_address, _client_tag, _handoff_server_pipeline_address ) state["writer-client-dict"][data_writer_client.server_node_name] = \ data_writer_client # don't run all the status checkers at the same time status_checkers.append( (data_writer_client.run, time.time() + random.random() * 60.0, ) ) state["queue-dispatcher"] = DequeDispatcher( state, state["receive-queue"], _dispatch_table ) state["handoff-requestor"] = HandoffRequestor(state, _local_node_name) state["handoff-starter"] = HandoffStarter( state, _local_node_name, state["event-push-client"] ) state["event-push-client"].info("program-start", "handoff_server starts") timer_driven_callbacks = [ (state["handoff-starter"].run, state["handoff-starter"].next_run(), ), (state["pollster"].run, time.time(), ), (state["queue-dispatcher"].run, time.time(), ), # try to spread out handoff polling, if all nodes start together (state["handoff-requestor"].run, time.time() + random.random() * handoff_polling_interval) ] timer_driven_callbacks.extend(status_checkers) return timer_driven_callbacks
def setUp(self): if not hasattr(self, "_log"): self._log = logging.getLogger("TestHandoffServer") self.tearDown() database_connection = get_central_connection() cluster_row = get_cluster_row(database_connection) node_rows = get_node_rows(database_connection, cluster_row.id) database_connection.close() self._key_generator = generate_key() self._event_publisher_processes = list() self._data_writer_processes = list() self._data_reader_processes = list() self._handoff_server_processes = list() for i in xrange(_node_count): node_name = _generate_node_name(i) repository_path = _repository_path(node_name) os.makedirs(repository_path) process = start_event_publisher( node_name, _event_publisher_pull_addresses[i], _event_publisher_pub_addresses[i] ) poll_result = poll_process(process) self.assertEqual(poll_result, None) self._event_publisher_processes.append(process) time.sleep(1.0) process = start_data_writer( _cluster_name, node_name, _data_writer_addresses[i], _event_publisher_pull_addresses[i], repository_path ) poll_result = poll_process(process) self.assertEqual(poll_result, None) self._data_writer_processes.append(process) time.sleep(1.0) process = start_data_reader( node_name, _data_reader_addresses[i], _event_publisher_pull_addresses[i], repository_path ) poll_result = poll_process(process) self.assertEqual(poll_result, None) self._data_reader_processes.append(process) time.sleep(1.0) process = start_handoff_server( _cluster_name, node_name, _handoff_server_addresses, _handoff_server_pipeline_addresses[i], _data_reader_addresses, _data_writer_addresses, _event_publisher_pull_addresses[i], _repository_path(node_name) ) poll_result = poll_process(process) self.assertEqual(poll_result, None) self._handoff_server_processes.append(process) time.sleep(1.0) self._context = zmq.context.Context() self._pollster = GreenletZeroMQPollster() self._deliverator = Deliverator() self._pull_server = GreenletPULLServer( self._context, _client_address, self._deliverator ) self._pull_server.register(self._pollster) backup_nodes = random.sample(node_rows[1:], 2) self._log.debug("backup nodes = %s" % ( [n.name for n in backup_nodes], )) self._resilient_clients = list() for node_row, address in zip(node_rows, _data_writer_addresses): if not node_row in backup_nodes: continue resilient_client = GreenletResilientClient( self._context, self._pollster, node_row.name, address, _local_node_name, _client_address, self._deliverator, ) self._resilient_clients.append(resilient_client) self._log.debug("%s resilient clients" % ( len(self._resilient_clients), )) self._data_writer_handoff_client = DataWriterHandoffClient( node_rows[0].name, self._resilient_clients ) self._pollster.start()