def setUp(self):
     self.tearDown()
     self._connection = get_central_connection()
     self._connection.begin_transaction()
     purge_customer(self._connection, _test_username)
     create_customer(self._connection, _test_username)
     add_key_to_customer(self._connection, _test_username)
     self._connection.commit()
Exemple #2
0
 def setUp(self):
     self.tearDown()
     self._connection = get_central_connection()
     self._connection.begin_transaction()
     purge_customer(self._connection, _test_username)
     create_customer(self._connection, _test_username)
     add_key_to_customer(self._connection, _test_username)
     self._connection.commit()
Exemple #3
0
def _load_collection_id_queue(collection_id_queue):
    """
    load every known collection into the queue
    """
    connection = get_central_connection()
    rows = connection.fetch_all_rows(
        "select id from nimbusio_central.collection order by id")
    connection.close()
    for (collection_id, ) in rows:
        collection_id_queue.append(collection_id)
Exemple #4
0
def _load_collection_id_queue(collection_id_queue):
    """
    load every known collection into the queue
    """
    connection = get_central_connection()
    rows = connection.fetch_all_rows(
        "select id from nimbusio_central.collection order by id"
    )
    connection.close()
    for (collection_id, ) in rows:
        collection_id_queue.append(collection_id)
def delete_all_motoboto_test_segments():
    central_conn = get_central_connection()
    local_conn = get_node_local_connection()
    collection_id_rows = central_conn.fetch_all_rows(_test_collections_query, [])
    central_conn.close()

    local_conn.begin_transaction()
    local_conn.execute("create temp table tmp_motoboto_collection_ids (id int4 not null)", [])
    for row in collection_id_rows:
        local_conn.execute("insert into tmp_motoboto_collection_ids values (%s)", row)

    for query in _delete_test_collections_data:
        rowcount = local_conn.execute(query, [])
        if rowcount:
            print "Deleted %s via %s" % (rowcount, query.split("\n", 1)[0])

    local_conn.commit()
def main():
    """
    main entry point
    """
    options = _parse_command_line()
    connection = get_central_connection()

    try:
        _dispatch_table[options.command](connection, options)
    except Exception:
        connection.rollback()
        raise
    else:
        connection.commit()
    finally:
        connection.close()

    return 0
Exemple #7
0
def main():
    """
    main entry point
    """
    options = _parse_command_line()
    connection = get_central_connection()

    connection.begin_transaction()
    try:
        _dispatch_table[options.command](connection, options)
    except Exception:
        connection.rollback()
        raise
    else:
        connection.commit()
    finally:
        connection.close()

    return 0
Exemple #8
0
def get_versioned_collections():
    """
    return the set of all collection ids for which versioning is true
    """
    log = logging.getLogger("get_versioned_collections")
    versioned_collections = set()

    connection = get_central_connection()

    try:
        for (collection_id, ) in connection.fetch_all_rows(_query, []):
            versioned_collections.add(collection_id)

    finally:
        connection.close()

    log.info("found {0} versioned collectons".format(
        len(versioned_collections)))

    return versioned_collections
def get_versioned_collections():
    """
    return the set of all collection ids for which versioning is true
    """
    log = logging.getLogger("get_versioned_collections")
    versioned_collections = set()

    connection = get_central_connection()

    try:
        for (collection_id, ) in connection.fetch_all_rows(_query, []):
            versioned_collections.add(collection_id)

    finally:
        connection.close()

    log.info("found {0} versioned collectons".format(
        len(versioned_collections)
    ))

    return versioned_collections
Exemple #10
0
def delete_all_motoboto_test_segments():
    central_conn = get_central_connection()
    local_conn = get_node_local_connection()
    collection_id_rows = central_conn.fetch_all_rows(_test_collections_query,
                                                     [])
    central_conn.close()

    local_conn.begin_transaction()
    local_conn.execute(
        "create temp table tmp_motoboto_collection_ids (id int4 not null)", [])
    for row in collection_id_rows:
        local_conn.execute(
            "insert into tmp_motoboto_collection_ids values (%s)", row)

    for query in _delete_test_collections_data:
        rowcount = local_conn.execute(query, [])
        if rowcount:
            print "Deleted %s via %s" % (
                rowcount,
                query.split("\n", 1)[0],
            )

    local_conn.commit()
def _setup(_halt_event, state):
    log = logging.getLogger("_setup")

    # do the event push client first, because we may need to
    # push an execption event from setup
    state["event-push-client"] = EventPushClient(state["zmq-context"], "data_writer")

    log.info("binding resilient-server to %s" % (_data_writer_address,))
    state["resilient-server"] = ResilientServer(state["zmq-context"], _data_writer_address, state["receive-queue"])
    state["resilient-server"].register(state["pollster"])

    state["queue-dispatcher"] = DequeDispatcher(state, state["receive-queue"], _dispatch_table)

    central_connection = get_central_connection()
    state["cluster-row"] = get_cluster_row(central_connection)
    state["node-rows"] = get_node_rows(central_connection, state["cluster-row"].id)
    central_connection.close()

    state["node-id-dict"] = dict([(node_row.name, node_row.id) for node_row in state["node-rows"]])

    state["database-connection"] = get_node_local_connection()

    # Ticket #1646 mark output value files as closed at startup
    mark_value_files_as_closed(state["database-connection"])

    state["writer"] = Writer(state["database-connection"], _repository_path)

    state["stats-reporter"] = StatsReporter(state)

    state["event-push-client"].info("program-start", "data_writer starts")

    return [
        (state["pollster"].run, time.time()),
        (state["queue-dispatcher"].run, time.time()),
        (state["stats-reporter"].run, state["stats-reporter"].next_run()),
    ]
Exemple #12
0
 def __init__(self, transaction=True):
     self._log = logging.getLogger("SpaceAccountingDatabase")
     self._connection = get_central_connection()
     if transaction:
         self._connection.execute("BEGIN;")
Exemple #13
0
def main():
    """
    main entry point
    return 0 for success (exit code)
    """
    initialize_logging(_log_path)
    log = logging.getLogger("main")
    log.info("program starts")

    halt_event = Event()
    set_signal_handler(halt_event)

    zeromq_context =  zmq.Context()

    event_push_client = EventPushClient(zeromq_context, 
                                        "redis_stats_collector")
    event_push_client.info("program-start", "flush_stats_from_redis starts")  

    # don't flush anything newer than 1 minute ago
    current_time = datetime.utcnow()
    timestamp_cutoff = current_time - timedelta(minutes=1)

    return_code = 0
    central_db_connection = None

    collection_ops_accounting_rows = list()

    # values to be added to the dedupe table
    new_dedupes = list()

    # keys to be deleted (a list for each node
    node_keys_processed = [list() for _ in _node_names]

    try:
        central_db_connection = get_central_connection()

        # On startup, the program connects to the central database and tries 
        # to acquire a pg_advisory_lock appropriate for this program and the 
        # data center it is running in using the pg_try_advisory_lock function.
        # If it cannot acquire the lock, it notes the status of the lock 
        # and exits. This central locking mechanism lets us avoid single points
        # of failure by configuring the program to run on multiple nodes.

        with advisory_lock(central_db_connection, "redis_stats_collector"):
            node_dict = _retrieve_node_dict(central_db_connection)
            for node_name, keys_processed in \
                zip(_node_names, node_keys_processed):
                node_id = node_dict[node_name]
                log.debug("processing node {0} node_id={1}".format(node_name,
                                                                  node_id))

                # The program then selects into memory all recently collected 
                # keys from the central database table 
                # collection_ops_accounting_flush_dedupe and stores them in a 
                # dedupe set. This set allows runs of the collection/flush 
                # program to be idempotent across some time period (
                # but we won't keep the list of old keys forever.) 

                dedupe_set = _retrieve_dedupe_set(central_db_connection, 
                                                  node_id)

                # The program then visits the Redis instance on every storage 
                # node in the local data center, collecting the data from all 
                # past stats keys -- aggregating it into the program's memory.  
                # The aggregation should involve buckets for each 
                # storage_node_id and redis key, corresponding to the columns 
                # in the database.
                _process_one_node(node_name,
                                  node_dict[node_name],
                                  timestamp_cutoff,
                                  dedupe_set,
                                  collection_ops_accounting_rows,
                                  new_dedupes,
                                  keys_processed)

            # After collecting past keys from every storage node, 
            # inside a central database transaction:
            # 1. Insert the collected stats into the central database 
            #    collection_ops_accounting
            # 2. Insert collected keys into recently collected keys 
            #    collection_ops_accounting_flush_dedupe.
            # 3. commit transaction
            log.debug("updating central database")
            central_db_connection.begin_transaction()
            try:
                _insert_accounting_rows(central_db_connection,
                                        collection_ops_accounting_rows)
                _insert_dedupe_rows(central_db_connection, 
                                    timestamp_cutoff, 
                                    new_dedupes)
            except Exception:
                central_db_connection.rollback()
                raise
            else:
                central_db_connection.commit()

            # Then revisit the Redis nodes, and delete the keys we flushed 
            # into the database, and any keys we skipped because they were 
            # found in the dedupe set.
            for node_name, keys_processed in zip(_node_names, 
                                                 node_keys_processed):
                _remove_processed_keys(node_name, keys_processed)

    except Exception as instance:
        log.exception("Uhandled exception {0}".format(instance))
        event_push_client.exception(
            unhandled_exception_topic,
            str(instance),
            exctype=instance.__class__.__name__
        )
        return_code = 1

    if central_db_connection is not None:
        central_db_connection.close()

    event_push_client.close()
    zeromq_context.term()

    log.info("program terminates return_code = {0}".format(return_code))
    return return_code
 def __init__(self, transaction=True):
     self._log = logging.getLogger("SpaceAccountingDatabase")
     self._connection = get_central_connection()
     if transaction:
         self._connection.execute("BEGIN;")
    def __init__(self):
        self._log = logging.getLogger("WebInternalReader")

        memcached_client = memcache.Client(_memcached_nodes)

        self._central_connection = get_central_connection()
        self._cluster_row = get_cluster_row(self._central_connection)
        self._node_local_connection = get_node_local_connection()
        self._deliverator = Deliverator()

        self._zeromq_context = zmq.Context()

        self._pull_server = GreenletPULLServer(
            self._zeromq_context, 
            _web_internal_reader_pipeline_address,
            self._deliverator
        )
        self._pull_server.link_exception(self._unhandled_greenlet_exception)

        self._data_reader_clients = list()
        self._data_readers = list()
        for node_name, address in zip(_node_names, _data_reader_addresses):
            resilient_client = GreenletResilientClient(
                self._zeromq_context, 
                node_name,
                address,
                _client_tag,
                _web_internal_reader_pipeline_address,
                self._deliverator,
                connect_messages=[]
            )
            resilient_client.link_exception(self._unhandled_greenlet_exception)
            self._data_reader_clients.append(resilient_client)
            data_reader = DataReader(
                node_name, resilient_client
            )
            self._data_readers.append(data_reader)

        self._space_accounting_dealer_client = GreenletDealerClient(
            self._zeromq_context, 
            _local_node_name, 
            _space_accounting_server_address
        )
        self._space_accounting_dealer_client.link_exception(
            self._unhandled_greenlet_exception
        )

        push_client = GreenletPUSHClient(
            self._zeromq_context, 
            _local_node_name, 
            _space_accounting_pipeline_address,
        )

        self._accounting_client = SpaceAccountingClient(
            _local_node_name,
            self._space_accounting_dealer_client,
            push_client
        )

        self._event_push_client = EventPushClient(
            self._zeromq_context,
            "web-internal-reader"
        )

        # message sent to data readers telling them the server
        # is (re)starting, thereby invalidating any archvies or retrieved
        # that are in progress for this node
        timestamp = create_timestamp()
        self._event_push_client.info("web-reader-start",
                                     "web reader (re)start",
                                     timestamp_repr=repr(timestamp),
                                     source_node_name=_local_node_name)

        self._watcher = Watcher(
            _stats, 
            self._data_reader_clients,
            self._event_push_client
        )

        self.application = Application(
            memcached_client,
            self._central_connection,
            self._node_local_connection,
            self._cluster_row,
            self._data_readers,
            self._accounting_client,
            self._event_push_client,
            _stats
        )
        self.wsgi_server = WSGIServer(
            (_web_internal_reader_host, _web_internal_reader_port), 
            application=self.application,
            backlog=_wsgi_backlog
        )
Exemple #16
0
    def __init__(self):
        self._log = logging.getLogger("WebInternalReader")

        memcached_client = memcache.Client(_memcached_nodes)

        self._central_connection = get_central_connection()
        self._cluster_row = get_cluster_row(self._central_connection)
        self._node_local_connection = get_node_local_connection()
        self._deliverator = Deliverator()

        self._zeromq_context = zmq.Context()

        self._pull_server = GreenletPULLServer(
            self._zeromq_context, _web_internal_reader_pipeline_address,
            self._deliverator)
        self._pull_server.link_exception(self._unhandled_greenlet_exception)

        self._data_reader_clients = list()
        self._data_readers = list()
        for node_name, address in zip(_node_names, _data_reader_addresses):
            resilient_client = GreenletResilientClient(
                self._zeromq_context,
                node_name,
                address,
                _client_tag,
                _web_internal_reader_pipeline_address,
                self._deliverator,
                connect_messages=[])
            resilient_client.link_exception(self._unhandled_greenlet_exception)
            self._data_reader_clients.append(resilient_client)
            data_reader = DataReader(node_name, resilient_client)
            self._data_readers.append(data_reader)

        self._space_accounting_dealer_client = GreenletDealerClient(
            self._zeromq_context, _local_node_name,
            _space_accounting_server_address)
        self._space_accounting_dealer_client.link_exception(
            self._unhandled_greenlet_exception)

        push_client = GreenletPUSHClient(
            self._zeromq_context,
            _local_node_name,
            _space_accounting_pipeline_address,
        )

        self._accounting_client = SpaceAccountingClient(
            _local_node_name, self._space_accounting_dealer_client,
            push_client)

        self._event_push_client = EventPushClient(self._zeromq_context,
                                                  "web-internal-reader")

        # message sent to data readers telling them the server
        # is (re)starting, thereby invalidating any archvies or retrieved
        # that are in progress for this node
        timestamp = create_timestamp()
        self._event_push_client.info("web-reader-start",
                                     "web reader (re)start",
                                     timestamp_repr=repr(timestamp),
                                     source_node_name=_local_node_name)

        self._watcher = Watcher(_stats, self._data_reader_clients,
                                self._event_push_client)

        self.application = Application(memcached_client,
                                       self._central_connection,
                                       self._node_local_connection,
                                       self._cluster_row, self._data_readers,
                                       self._accounting_client,
                                       self._event_push_client, _stats)
        self.wsgi_server = WSGIServer(
            (_web_internal_reader_host, _web_internal_reader_port),
            application=self.application,
            backlog=_wsgi_backlog)
def _setup(_halt_event, state):
    log = logging.getLogger("_setup")
    status_checkers = list()

    # do the event push client first, because we may need to
    # push an execption event from setup
    state["event-push-client"] = EventPushClient(
        state["zmq-context"],
        "handoff_server"
    )

    central_connection = get_central_connection()
    state["cluster-row"] = get_cluster_row(central_connection)
    state["node-rows"] = get_node_rows(
        central_connection, state["cluster-row"].id
    )
    central_connection.close()

    state["node-id-dict"] = dict(
        [(node_row.name, node_row.id, ) for node_row in state["node-rows"]]
    )
    state["node-name-dict"] = dict(
        [(node_row.id, node_row.name, ) for node_row in state["node-rows"]]
    )

    state["database-connection"] = get_node_local_connection()
    for node_row, handoff_server_address in zip(
        state["node-rows"], _handoff_server_addresses
    ):
        if node_row.name == _local_node_name:
            log.info("binding resilient-server to %s" % (
                handoff_server_address, 
            ))
            state["resilient-server"] = ResilientServer(
                state["zmq-context"],
                handoff_server_address,
                state["receive-queue"]
            )
            state["resilient-server"].register(state["pollster"])
        else:
            handoff_server_client = ResilientClient(
                state["zmq-context"],
                state["pollster"],
                node_row.name,
                handoff_server_address,
                _client_tag,
                _handoff_server_pipeline_address
            )
            state["handoff-server-clients"].append(handoff_server_client)
            # don't run all the status checkers at the same time
            status_checkers.append(
                (handoff_server_client.run, 
                 time.time() + random.random() * 60.0, )
            )        

    log.info("binding pull-server to %s" % (_handoff_server_pipeline_address, ))
    state["pull-server"] = PULLServer(
        state["zmq-context"],
        _handoff_server_pipeline_address,
        state["receive-queue"]
    )
    state["pull-server"].register(state["pollster"])

    for node_row, data_reader_address in zip(
        state["node-rows"], _data_reader_addresses
    ):
        data_reader_client = ResilientClient(
            state["zmq-context"],
            state["pollster"],
            node_row.name,
            data_reader_address,
            _client_tag,
            _handoff_server_pipeline_address
        )
        state["reader-client-dict"][data_reader_client.server_node_name] = \
                data_reader_client
        # don't run all the status checkers at the same time
        status_checkers.append(
            (data_reader_client.run, time.time() + random.random() * 60.0, )
        )        

    for node_row, data_writer_address in zip(
        state["node-rows"], _data_writer_addresses
    ):
        data_writer_client = ResilientClient(
            state["zmq-context"],
            state["pollster"],
            node_row.name,
            data_writer_address,
            _client_tag,
            _handoff_server_pipeline_address
        )
        state["writer-client-dict"][data_writer_client.server_node_name] = \
                data_writer_client
        # don't run all the status checkers at the same time
        status_checkers.append(
            (data_writer_client.run, time.time() + random.random() * 60.0, )
        )        

    state["queue-dispatcher"] = DequeDispatcher(
        state,
        state["receive-queue"],
        _dispatch_table
    )

    state["handoff-requestor"] = HandoffRequestor(state, _local_node_name)
    state["handoff-starter"] = HandoffStarter(
        state, _local_node_name, state["event-push-client"]
    )

    state["event-push-client"].info("program-start", "handoff_server starts")  

    timer_driven_callbacks = [
        (state["handoff-starter"].run, state["handoff-starter"].next_run(), ),
        (state["pollster"].run, time.time(), ), 
        (state["queue-dispatcher"].run, time.time(), ), 
        # try to spread out handoff polling, if all nodes start together
        (state["handoff-requestor"].run,
            time.time() + random.random() * handoff_polling_interval)
    ] 
    timer_driven_callbacks.extend(status_checkers)
    return timer_driven_callbacks
    def __init__(self):
        self._log = logging.getLogger("WebServer")
        authenticator = SqlAuthenticator()

        self._central_connection = get_central_connection()
        self._cluster_row = get_cluster_row(self._central_connection)
        self._node_local_connection = get_node_local_connection()
        self._unified_id_factory = UnifiedIDFactory(
            self._central_connection,
            _get_shard_id(self._central_connection, self._cluster_row.id)
        )
        self._deliverator = Deliverator()

        self._zeromq_context = zmq.Context()

        self._pull_server = GreenletPULLServer(
            self._zeromq_context, 
            _web_server_pipeline_address,
            self._deliverator
        )
        self._pull_server.link_exception(self._unhandled_greenlet_exception)

        # message sent to data readers and writers telling them the server
        # is (re)starting, thereby invalidating any archvies or retrieved
        # that are in progress for this node
        timestamp = create_timestamp()
        start_message = {
            "message-type"              : "web-server-start",
            "priority"                  : create_priority(),
            "unified-id"                : self._unified_id_factory.next(),
            "timestamp-repr"            : repr(timestamp),
            "source-node-name"          : _local_node_name,
        }

        self._data_writer_clients = list()
        for node_name, address in zip(_node_names, _data_writer_addresses):
            resilient_client = GreenletResilientClient(
                self._zeromq_context, 
                node_name,
                address,
                _client_tag,
                _web_server_pipeline_address,
                self._deliverator,
                connect_messages=[start_message, ]
            )
            resilient_client.link_exception(self._unhandled_greenlet_exception)
            self._data_writer_clients.append(resilient_client)

        self._data_reader_clients = list()
        self._data_readers = list()
        for node_name, address in zip(_node_names, _data_reader_addresses):
            resilient_client = GreenletResilientClient(
                self._zeromq_context, 
                node_name,
                address,
                _client_tag,
                _web_server_pipeline_address,
                self._deliverator,
                connect_messages=[start_message, ]
            )
            resilient_client.link_exception(self._unhandled_greenlet_exception)
            self._data_reader_clients.append(resilient_client)
            data_reader = DataReader(
                node_name, resilient_client
            )
            self._data_readers.append(data_reader)

        self._space_accounting_dealer_client = GreenletDealerClient(
            self._zeromq_context, 
            _local_node_name, 
            _space_accounting_server_address
        )
        self._space_accounting_dealer_client.link_exception(
            self._unhandled_greenlet_exception
        )

        push_client = GreenletPUSHClient(
            self._zeromq_context, 
            _local_node_name, 
            _space_accounting_pipeline_address,
        )

        self._accounting_client = SpaceAccountingClient(
            _local_node_name,
            self._space_accounting_dealer_client,
            push_client
        )

        self._event_push_client = EventPushClient(
            self._zeromq_context,
            "web-server"
        )

        self._watcher = Watcher(
            _stats, 
            self._data_reader_clients,
            self._data_writer_clients,
            self._event_push_client
        )

        id_translator_keys_path = os.path.join(
            _repository_path, "id_translator_keys.pkl"
        )
        with open(id_translator_keys_path, "r") as input_file:
            id_translator_keys = pickle.load(input_file)

        self._id_translator = InternalIDTranslator(
            id_translator_keys["key"],
            id_translator_keys["hmac_key"], 
            id_translator_keys["iv_key"],
            id_translator_keys["hmac_size"]
        )
        self.application = Application(
            self._central_connection,
            self._node_local_connection,
            self._cluster_row,
            self._unified_id_factory,
            self._id_translator,
            self._data_writer_clients,
            self._data_readers,
            authenticator,
            self._accounting_client,
            self._event_push_client,
            _stats
        )
        self.wsgi_server = WSGIServer(
            (_web_server_host, _web_server_port), 
            application=self.application,
            backlog=_wsgi_backlog
        )
Exemple #19
0
    def setUp(self):
        if not hasattr(self, "_log"):
            self._log = logging.getLogger("TestHandoffServer")

        self.tearDown()
        database_connection = get_central_connection()
        cluster_row = get_cluster_row(database_connection)
        node_rows = get_node_rows(database_connection, cluster_row.id)
        database_connection.close()

        self._key_generator = generate_key()

        self._event_publisher_processes = list()
        self._data_writer_processes = list()
        self._data_reader_processes = list()
        self._handoff_server_processes = list()

        for i in xrange(_node_count):
            node_name = _generate_node_name(i)
            repository_path = _repository_path(node_name)
            os.makedirs(repository_path)

            process = start_event_publisher(node_name,
                                            _event_publisher_pull_addresses[i],
                                            _event_publisher_pub_addresses[i])
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._event_publisher_processes.append(process)
            time.sleep(1.0)

            process = start_data_writer(_cluster_name, node_name,
                                        _data_writer_addresses[i],
                                        _event_publisher_pull_addresses[i],
                                        repository_path)
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._data_writer_processes.append(process)
            time.sleep(1.0)

            process = start_data_reader(node_name, _data_reader_addresses[i],
                                        _event_publisher_pull_addresses[i],
                                        repository_path)
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._data_reader_processes.append(process)
            time.sleep(1.0)

            process = start_handoff_server(
                _cluster_name, node_name, _handoff_server_addresses,
                _handoff_server_pipeline_addresses[i], _data_reader_addresses,
                _data_writer_addresses, _event_publisher_pull_addresses[i],
                _repository_path(node_name))
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._handoff_server_processes.append(process)
            time.sleep(1.0)

        self._context = zmq.context.Context()
        self._pollster = GreenletZeroMQPollster()
        self._deliverator = Deliverator()

        self._pull_server = GreenletPULLServer(self._context, _client_address,
                                               self._deliverator)
        self._pull_server.register(self._pollster)

        backup_nodes = random.sample(node_rows[1:], 2)
        self._log.debug("backup nodes = %s" % ([n.name
                                                for n in backup_nodes], ))

        self._resilient_clients = list()
        for node_row, address in zip(node_rows, _data_writer_addresses):
            if not node_row in backup_nodes:
                continue
            resilient_client = GreenletResilientClient(
                self._context,
                self._pollster,
                node_row.name,
                address,
                _local_node_name,
                _client_address,
                self._deliverator,
            )
            self._resilient_clients.append(resilient_client)
        self._log.debug("%s resilient clients" %
                        (len(self._resilient_clients), ))

        self._data_writer_handoff_client = DataWriterHandoffClient(
            node_rows[0].name, self._resilient_clients)

        self._pollster.start()
    def setUp(self):
        if not hasattr(self, "_log"):
            self._log = logging.getLogger("TestHandoffServer")

        self.tearDown()
        database_connection = get_central_connection()
        cluster_row = get_cluster_row(database_connection)
        node_rows = get_node_rows(database_connection, cluster_row.id)
        database_connection.close()

        self._key_generator = generate_key()

        self._event_publisher_processes = list()
        self._data_writer_processes = list()
        self._data_reader_processes = list()
        self._handoff_server_processes = list()

        for i in xrange(_node_count):
            node_name = _generate_node_name(i)
            repository_path = _repository_path(node_name)
            os.makedirs(repository_path)
            
            process = start_event_publisher(
                node_name, 
                _event_publisher_pull_addresses[i],
                _event_publisher_pub_addresses[i]
            )
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._event_publisher_processes.append(process)
            time.sleep(1.0)

            process = start_data_writer(
                _cluster_name,
                node_name, 
                _data_writer_addresses[i],
                _event_publisher_pull_addresses[i],
                repository_path
            )
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._data_writer_processes.append(process)
            time.sleep(1.0)

            process = start_data_reader(
                node_name, 
                _data_reader_addresses[i],
                _event_publisher_pull_addresses[i], 
                repository_path
            )
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._data_reader_processes.append(process)
            time.sleep(1.0)

            process = start_handoff_server(
                _cluster_name,
                node_name, 
                _handoff_server_addresses,
                _handoff_server_pipeline_addresses[i],
                _data_reader_addresses,
                _data_writer_addresses,
                _event_publisher_pull_addresses[i], 
                _repository_path(node_name)
            )
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._handoff_server_processes.append(process)
            time.sleep(1.0)

        self._context = zmq.context.Context()
        self._pollster = GreenletZeroMQPollster()
        self._deliverator = Deliverator()

        self._pull_server = GreenletPULLServer(
            self._context, 
            _client_address,
            self._deliverator
        )
        self._pull_server.register(self._pollster)

        backup_nodes = random.sample(node_rows[1:], 2)
        self._log.debug("backup nodes = %s" % (
            [n.name for n in backup_nodes], 
        ))

        self._resilient_clients = list()        
        for node_row, address in zip(node_rows, _data_writer_addresses):
            if not node_row in backup_nodes:
                continue
            resilient_client = GreenletResilientClient(
                self._context,
                self._pollster,
                node_row.name,
                address,
                _local_node_name,
                _client_address,
                self._deliverator,
            )
            self._resilient_clients.append(resilient_client)
        self._log.debug("%s resilient clients" % (
            len(self._resilient_clients), 
        ))

        self._data_writer_handoff_client = DataWriterHandoffClient(
            node_rows[0].name,
            self._resilient_clients
        )

        self._pollster.start()
def _setup(_halt_event, state):
    log = logging.getLogger("_setup")
    status_checkers = list()

    # do the event push client first, because we may need to
    # push an execption event from setup
    state["event-push-client"] = EventPushClient(
        state["zmq-context"],
        "anti_entropy_server"
    )

    state["central-database-connection"] = get_central_connection()
    state["local-database-connection"] = get_node_local_connection()

    state["cluster-row"] = get_cluster_row(
        state["central-database-connection"] 
    )

    local_anti_entropy_server_address = None
    for node_name, address in zip(_node_names, _anti_entropy_server_addresses):
        if node_name == _local_node_name:
            local_anti_entropy_server_address = address
            break
    assert local_anti_entropy_server_address is not None

    log.info("binding resilient-server to %s" % (
        local_anti_entropy_server_address, 
    ))
    state["resilient-server"] = ResilientServer(
        state["zmq-context"],
        local_anti_entropy_server_address,
        state["receive-queue"]
    )
    state["resilient-server"].register(state["pollster"])

    log.info("binding pull-server to %s" % (
        _anti_entropy_server_pipeline_address, 
    ))
    state["pull-server"] = PULLServer(
        state["zmq-context"],
        _anti_entropy_server_pipeline_address,
        state["receive-queue"]
    )
    state["pull-server"].register(state["pollster"])

    state["anti-entropy-clients"] = list()
    for node_name, anti_entropy_server_address in zip(
        _node_names, _anti_entropy_server_addresses
    ):
        resilient_client = ResilientClient(
                state["zmq-context"],
                state["pollster"],
                node_name,
                anti_entropy_server_address,
                _client_tag,
                _anti_entropy_server_pipeline_address
            )
        state["anti-entropy-clients"].append(resilient_client)
        status_checkers.append(
            (resilient_client.run, time.time() + random.random() * 60.0, )
        )        

    state["queue-dispatcher"] = DequeDispatcher(
        state,
        state["receive-queue"],
        _dispatch_table
    )

    state["collection-list-requestor"] = CollectionListRequestor(state)
    state["consistency-check-starter"] = ConsistencyCheckStarter(
        state, _start_consistency_check
    )
    state["retry-manager"] = RetryManager(
        state, _start_consistency_check
    )
    state["state-cleaner"] = StateCleaner(state)

    state["event-push-client"].info(
        "program-start", "anti_entropy_server starts"
    )  

    # start the collection list requestor right away
    # start the consistency check starter a little later, when
    # we presumably have some collection ids
    timer_driven_callbacks = [
        (state["pollster"].run, time.time(), ), 
        (state["queue-dispatcher"].run, time.time(), ), 
        (state["collection-list-requestor"].run, time.time(), ), 
        (state["consistency-check-starter"].run, time.time()+60.0, ), 
        (state["retry-manager"].run, state["retry-manager"].next_run(), ), 
        (state["state-cleaner"].run, state["state-cleaner"].next_run(), ), 
    ] 
    timer_driven_callbacks.extend(status_checkers)
    return timer_driven_callbacks
def _setup(state):
    log = logging.getLogger("_setup")

    # do the event push client first, because we may need to
    # push an execption event from setup
    state["event-push-client"] = EventPushClient(
        state["zmq-context"],
        "data_writer"
    )

    log.info("binding resilient-server to {0}".format(_data_writer_address))
    state["resilient-server"] = ResilientServer(
        state["zmq-context"],
        _data_writer_address,
        state["message-queue"]
    )
    state["resilient-server"].register(state["pollster"])

    log.info("binding reply-pull-server to {0}".format(
        _writer_thread_reply_address))
    state["reply-pull-server"] = ReplyPULLServer(
        state["zmq-context"],
        _writer_thread_reply_address,
        state["resilient-server"].send_reply
    )
    state["reply-pull-server"].register(state["pollster"])

    log.info("binding anti-entropy-server to {0}".format(
        _data_writer_anti_entropy_address))
    state["anti-entropy-server"] = REPServer(
        state["zmq-context"],
        _data_writer_anti_entropy_address,
        state["message-queue"]
    )
    state["anti-entropy-server"].register(state["pollster"])

    topics = ["web-writer-start", ]
    log.info("connecting sub-client to {0} subscribing to {1}".format(
        _event_aggregator_pub_address,
        topics))
    state["sub-client"] = SUBClient(
        state["zmq-context"],
        _event_aggregator_pub_address,
        topics,
        state["message-queue"]
    )
    state["sub-client"].register(state["pollster"])

    central_connection = get_central_connection()
    state["cluster-row"] = get_cluster_row(central_connection)
    state["node-rows"] = get_node_rows(
        central_connection, state["cluster-row"].id
    )
    central_connection.close()

    state["node-id-dict"] = dict(
        [(node_row.name, node_row.id, ) for node_row in state["node-rows"]]
    )

    state["event-push-client"].info("program-start", "data_writer starts")


    state["reply-push-client"] = PUSHClient(state["zmq-context"],
                                            _writer_thread_reply_address)

    state["writer-thread"] = WriterThread(state["halt-event"],
                                          state["node-id-dict"],
                                          state["message-queue"],
                                          state["reply-push-client"])
    state["writer-thread"].start()

    state["sync-thread"] = SyncThread(state["halt-event"],
                                      state["message-queue"])
    state["sync-thread"].start()