def send_request_and_get_reply_and_data(
    server_node_name,
    server_address, 
    client_tag, 
    client_address, 
    request, 
    data=None
):
    log = logging.getLogger("send_request_and_get_reply_and_data")
    context = zmq.Context()
    deliverator = Deliverator()

    pull_server = GreenletPULLServer(
        context, 
        client_address,
        deliverator
    )
    pull_server.start()

    resilient_client = GreenletResilientClient(
        context,
        server_node_name,
        server_address,
        client_tag,
        client_address,
        deliverator,
    )
    resilient_client.start()

    # loop until the resilient client connects
    test_status_count = 0
    while True:
        status_name, _, __ = resilient_client.test_current_status()
        if status_name == "connected":
            break
        test_status_count += 1
        if test_status_count > 5:
            log.error("too many status retries")
            raise UtilError("too many status retries")

        log.warn("status retry delay")
        gevent.sleep(10.0)

    delivery_channel = resilient_client.queue_message_for_send(request, data)
    reply, data = delivery_channel.get()

    pull_server.kill()
    resilient_client.kill()
    pull_server.join()
    resilient_client.join()

    context.term()
    return reply, data
Exemple #2
0
def send_request_and_get_reply_and_data(server_node_name,
                                        server_address,
                                        client_tag,
                                        client_address,
                                        request,
                                        data=None):
    log = logging.getLogger("send_request_and_get_reply_and_data")
    context = zmq.Context()
    deliverator = Deliverator()

    pull_server = GreenletPULLServer(context, client_address, deliverator)
    pull_server.start()

    resilient_client = GreenletResilientClient(
        context,
        server_node_name,
        server_address,
        client_tag,
        client_address,
        deliverator,
    )
    resilient_client.start()

    # loop until the resilient client connects
    test_status_count = 0
    while True:
        if resilient_client.connected:
            break
        test_status_count += 1
        if test_status_count > 5:
            log.error("too many status retries")
            raise UtilError("too many status retries")

        log.warn("status retry delay")
        gevent.sleep(10.0)

    delivery_channel = resilient_client.queue_message_for_send(request, data)
    reply, data = delivery_channel.get()

    pull_server.kill()
    resilient_client.kill()
    pull_server.join()
    resilient_client.join()

    context.term()
    return reply, data
Exemple #3
0
class TestHandoffServer(unittest.TestCase):
    """test message handling in handoff server"""
    def setUp(self):
        if not hasattr(self, "_log"):
            self._log = logging.getLogger("TestHandoffServer")

        self.tearDown()
        database_connection = get_central_connection()
        cluster_row = get_cluster_row(database_connection)
        node_rows = get_node_rows(database_connection, cluster_row.id)
        database_connection.close()

        self._key_generator = generate_key()

        self._event_publisher_processes = list()
        self._data_writer_processes = list()
        self._data_reader_processes = list()
        self._handoff_server_processes = list()

        for i in xrange(_node_count):
            node_name = _generate_node_name(i)
            repository_path = _repository_path(node_name)
            os.makedirs(repository_path)

            process = start_event_publisher(node_name,
                                            _event_publisher_pull_addresses[i],
                                            _event_publisher_pub_addresses[i])
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._event_publisher_processes.append(process)
            time.sleep(1.0)

            process = start_data_writer(_cluster_name, node_name,
                                        _data_writer_addresses[i],
                                        _event_publisher_pull_addresses[i],
                                        repository_path)
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._data_writer_processes.append(process)
            time.sleep(1.0)

            process = start_data_reader(node_name, _data_reader_addresses[i],
                                        _event_publisher_pull_addresses[i],
                                        repository_path)
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._data_reader_processes.append(process)
            time.sleep(1.0)

            process = start_handoff_server(
                _cluster_name, node_name, _handoff_server_addresses,
                _handoff_server_pipeline_addresses[i], _data_reader_addresses,
                _data_writer_addresses, _event_publisher_pull_addresses[i],
                _repository_path(node_name))
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._handoff_server_processes.append(process)
            time.sleep(1.0)

        self._context = zmq.context.Context()
        self._pollster = GreenletZeroMQPollster()
        self._deliverator = Deliverator()

        self._pull_server = GreenletPULLServer(self._context, _client_address,
                                               self._deliverator)
        self._pull_server.register(self._pollster)

        backup_nodes = random.sample(node_rows[1:], 2)
        self._log.debug("backup nodes = %s" % ([n.name
                                                for n in backup_nodes], ))

        self._resilient_clients = list()
        for node_row, address in zip(node_rows, _data_writer_addresses):
            if not node_row in backup_nodes:
                continue
            resilient_client = GreenletResilientClient(
                self._context,
                self._pollster,
                node_row.name,
                address,
                _local_node_name,
                _client_address,
                self._deliverator,
            )
            self._resilient_clients.append(resilient_client)
        self._log.debug("%s resilient clients" %
                        (len(self._resilient_clients), ))

        self._data_writer_handoff_client = DataWriterHandoffClient(
            node_rows[0].name, self._resilient_clients)

        self._pollster.start()

    def tearDown(self):
        if hasattr(self, "_handoff_server_processes") \
        and self._handoff_server_processes is not None:
            print >> sys.stderr, "terminating _handoff_server_processes"
            for process in self._handoff_server_processes:
                terminate_process(process)
            self._handoff_server_processes = None
        if hasattr(self, "_data_writer_processes") \
        and self._data_writer_processes is not None:
            print >> sys.stderr, "terminating _data_writer_processes"
            for process in self._data_writer_processes:
                terminate_process(process)
            self._data_writer_processes = None
        if hasattr(self, "_data_reader_processes") \
        and self._data_reader_processes is not None:
            print >> sys.stderr, "terminating _data_reader_processes"
            for process in self._data_reader_processes:
                terminate_process(process)
            self._data_reader_processes = None
        if hasattr(self, "_event_publisher_processes") \
        and self._event_publisher_processes is not None:
            print >> sys.stderr, "terminating _event_publisher_processes"
            for process in self._event_publisher_processes:
                terminate_process(process)
            self._event_publisher_processes = None

        if hasattr(self, "_pollster") \
        and self._pollster is not None:
            print >> sys.stderr, "terminating _pollster"
            self._pollster.kill()
            self._pollster.join(timeout=3.0)
            self._pollster = None

        if hasattr(self, "_pull_server") \
        and self._pull_server is not None:
            print >> sys.stderr, "terminating _pull_server"
            self._pull_server.close()
            self._pull_server = None

        if hasattr(self, "_resilient_clients") \
        and self._resilient_clients is not None:
            print >> sys.stderr, "terminating _resilient_clients"
            for client in self._resilient_clients:
                client.close()
            self._resilient_clients = None

        if hasattr(self, "_context") \
        and self._context is not None:
            print >> sys.stderr, "terminating _context"
            self._context.term()
            self._context = None

        if os.path.exists(_test_dir):
            shutil.rmtree(_test_dir)

    def test_handoff_small_content(self):
        """test retrieving content that fits in a single message"""
        file_size = 10 * 64 * 1024
        file_content = random_string(file_size)
        collection_id = 1001
        key = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 5

        file_adler32 = zlib.adler32(file_content)
        file_md5 = hashlib.md5(file_content)

        message = {
            "message-type": "archive-key-entire",
            "priority": archive_priority,
            "collection-id": collection_id,
            "key": key,
            "timestamp-repr": repr(timestamp),
            "segment-num": segment_num,
            "file-size": file_size,
            "file-adler32": file_adler32,
            "file-hash": b64encode(file_md5.digest()),
            "handoff-node-name": None,
        }
        g = gevent.spawn(self._send_message_get_reply, message, file_content)
        g.join(timeout=10.0)
        self.assertEqual(g.ready(), True)
        reply = g.value
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        print >> sys.stderr, "archive successful"
        print >> sys.stderr, "press [Enter] to continue"
        raw_input()


#    def test_handoff_large_content(self):
#        """test handing off content that fits in a multiple messages"""
#        segment_size = 120 * 1024
#        chunk_count = 10
#        total_size = int(1.2 * segment_size * chunk_count)
#        collection_id = 1001
#        test_data = [random_string(segment_size) for _ in range(chunk_count)]
#        key  = self._key_generator.next()
#        version_number = 0
#        segment_num = 5
#        sequence = 0
#        archive_priority = create_priority()
#        timestamp = create_timestamp()
#
#        file_adler32 = -42
#        file_md5 = "ffffffffffffffff"
#        segment_adler32 = 32
#        segment_md5 = "1111111111111111"
#
#        message = {
#            "message-type"      : "archive-key-start",
#            "priority"          : archive_priority,
#            "collection-id"     : collection_id,
#            "timestamp"         : timestamp,
#            "sequence"          : sequence,
#            "key"               : key,
#            "version-number"    : version_number,
#            "segment-num"    : segment_num,
#            "segment-size"      : segment_size,
#        }
#        g = gevent.spawn(
#            self._send_message_get_reply, message, test_data[sequence]
#        )
#        g.join(timeout=10.0)
#        self.assertEqual(g.ready(), True)
#        reply = g.value
#        self.assertEqual(reply["message-type"], "archive-key-start-reply")
#        self.assertEqual(reply["result"], "success")
#
#        for content_item in test_data[1:-1]:
#            sequence += 1
#            message = {
#                "message-type"      : "archive-key-next",
#                "priority"          : archive_priority,
#                "collection-id"     : collection_id,
#                "key"               : key,
#                "version-number"    : version_number,
#                "segment-num"       : segment_num,
#                "sequence"          : sequence,
#            }
#            g = gevent.spawn(
#                self._send_message_get_reply, message, test_data[sequence]
#            )
#            g.join(timeout=10.0)
#            self.assertEqual(g.ready(), True)
#            reply = g.value
#            self.assertEqual(reply["message-type"], "archive-key-next-reply")
#            self.assertEqual(reply["result"], "success")
#
#        sequence += 1
#        message = {
#            "message-type"      : "archive-key-final",
#            "priority"          : archive_priority,
#            "collection-id"     : collection_id,
#            "key"               : key,
#            "version-number"    : version_number,
#            "segment-num"       : segment_num,
#            "sequence"          : sequence,
#            "total-size"        : total_size,
#            "file-adler32"      : file_adler32,
#            "file-md5"          : b64encode(file_md5),
#            "segment-adler32"   : segment_adler32,
#            "segment-md5"       : b64encode(segment_md5),
#        }
#        g = gevent.spawn(
#            self._send_message_get_reply, message, test_data[sequence]
#        )
#        g.join(timeout=10.0)
#        self.assertEqual(g.ready(), True)
#        reply = g.value
#        self.assertEqual(reply["message-type"], "archive-key-final-reply")
#        self.assertEqual(reply["result"], "success")
#        self.assertEqual(reply["previous-size"], 0)
#
#        print >> sys.stderr, "archive successful: starting missing data writer"
#        self._start_missing_data_writer()
#        print >> sys.stderr, "data_writer started"
#        print >> sys.stderr, "press [Enter] to continue"
#        raw_input()

    def _send_message_get_reply(self, message, content_item):
        completion_channel = \
            self._data_writer_handoff_client.queue_message_for_send(
                message, data=content_item
            )
        self._log.debug("before completion_channel.get()")
        reply, _ = completion_channel.get()
        self._log.debug("after completion_channel.get()")
        return reply
Exemple #4
0
    def setUp(self):
        if not hasattr(self, "_log"):
            self._log = logging.getLogger("TestHandoffServer")

        self.tearDown()
        database_connection = get_central_connection()
        cluster_row = get_cluster_row(database_connection)
        node_rows = get_node_rows(database_connection, cluster_row.id)
        database_connection.close()

        self._key_generator = generate_key()

        self._event_publisher_processes = list()
        self._data_writer_processes = list()
        self._data_reader_processes = list()
        self._handoff_server_processes = list()

        for i in xrange(_node_count):
            node_name = _generate_node_name(i)
            repository_path = _repository_path(node_name)
            os.makedirs(repository_path)

            process = start_event_publisher(node_name,
                                            _event_publisher_pull_addresses[i],
                                            _event_publisher_pub_addresses[i])
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._event_publisher_processes.append(process)
            time.sleep(1.0)

            process = start_data_writer(_cluster_name, node_name,
                                        _data_writer_addresses[i],
                                        _event_publisher_pull_addresses[i],
                                        repository_path)
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._data_writer_processes.append(process)
            time.sleep(1.0)

            process = start_data_reader(node_name, _data_reader_addresses[i],
                                        _event_publisher_pull_addresses[i],
                                        repository_path)
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._data_reader_processes.append(process)
            time.sleep(1.0)

            process = start_handoff_server(
                _cluster_name, node_name, _handoff_server_addresses,
                _handoff_server_pipeline_addresses[i], _data_reader_addresses,
                _data_writer_addresses, _event_publisher_pull_addresses[i],
                _repository_path(node_name))
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._handoff_server_processes.append(process)
            time.sleep(1.0)

        self._context = zmq.context.Context()
        self._pollster = GreenletZeroMQPollster()
        self._deliverator = Deliverator()

        self._pull_server = GreenletPULLServer(self._context, _client_address,
                                               self._deliverator)
        self._pull_server.register(self._pollster)

        backup_nodes = random.sample(node_rows[1:], 2)
        self._log.debug("backup nodes = %s" % ([n.name
                                                for n in backup_nodes], ))

        self._resilient_clients = list()
        for node_row, address in zip(node_rows, _data_writer_addresses):
            if not node_row in backup_nodes:
                continue
            resilient_client = GreenletResilientClient(
                self._context,
                self._pollster,
                node_row.name,
                address,
                _local_node_name,
                _client_address,
                self._deliverator,
            )
            self._resilient_clients.append(resilient_client)
        self._log.debug("%s resilient clients" %
                        (len(self._resilient_clients), ))

        self._data_writer_handoff_client = DataWriterHandoffClient(
            node_rows[0].name, self._resilient_clients)

        self._pollster.start()
    def __init__(self):
        self._log = logging.getLogger("WebInternalReader")

        memcached_client = memcache.Client(_memcached_nodes)

        self._central_connection = get_central_connection()
        self._cluster_row = get_cluster_row(self._central_connection)
        self._node_local_connection = get_node_local_connection()
        self._deliverator = Deliverator()

        self._zeromq_context = zmq.Context()

        self._pull_server = GreenletPULLServer(
            self._zeromq_context, 
            _web_internal_reader_pipeline_address,
            self._deliverator
        )
        self._pull_server.link_exception(self._unhandled_greenlet_exception)

        self._data_reader_clients = list()
        self._data_readers = list()
        for node_name, address in zip(_node_names, _data_reader_addresses):
            resilient_client = GreenletResilientClient(
                self._zeromq_context, 
                node_name,
                address,
                _client_tag,
                _web_internal_reader_pipeline_address,
                self._deliverator,
                connect_messages=[]
            )
            resilient_client.link_exception(self._unhandled_greenlet_exception)
            self._data_reader_clients.append(resilient_client)
            data_reader = DataReader(
                node_name, resilient_client
            )
            self._data_readers.append(data_reader)

        self._space_accounting_dealer_client = GreenletDealerClient(
            self._zeromq_context, 
            _local_node_name, 
            _space_accounting_server_address
        )
        self._space_accounting_dealer_client.link_exception(
            self._unhandled_greenlet_exception
        )

        push_client = GreenletPUSHClient(
            self._zeromq_context, 
            _local_node_name, 
            _space_accounting_pipeline_address,
        )

        self._accounting_client = SpaceAccountingClient(
            _local_node_name,
            self._space_accounting_dealer_client,
            push_client
        )

        self._event_push_client = EventPushClient(
            self._zeromq_context,
            "web-internal-reader"
        )

        # message sent to data readers telling them the server
        # is (re)starting, thereby invalidating any archvies or retrieved
        # that are in progress for this node
        timestamp = create_timestamp()
        self._event_push_client.info("web-reader-start",
                                     "web reader (re)start",
                                     timestamp_repr=repr(timestamp),
                                     source_node_name=_local_node_name)

        self._watcher = Watcher(
            _stats, 
            self._data_reader_clients,
            self._event_push_client
        )

        self.application = Application(
            memcached_client,
            self._central_connection,
            self._node_local_connection,
            self._cluster_row,
            self._data_readers,
            self._accounting_client,
            self._event_push_client,
            _stats
        )
        self.wsgi_server = WSGIServer(
            (_web_internal_reader_host, _web_internal_reader_port), 
            application=self.application,
            backlog=_wsgi_backlog
        )
    def __init__(self):
        self._log = logging.getLogger("WebServer")
        authenticator = SqlAuthenticator()

        self._central_connection = get_central_connection()
        self._cluster_row = get_cluster_row(self._central_connection)
        self._node_local_connection = get_node_local_connection()
        self._unified_id_factory = UnifiedIDFactory(
            self._central_connection,
            _get_shard_id(self._central_connection, self._cluster_row.id)
        )
        self._deliverator = Deliverator()

        self._zeromq_context = zmq.Context()

        self._pull_server = GreenletPULLServer(
            self._zeromq_context, 
            _web_server_pipeline_address,
            self._deliverator
        )
        self._pull_server.link_exception(self._unhandled_greenlet_exception)

        # message sent to data readers and writers telling them the server
        # is (re)starting, thereby invalidating any archvies or retrieved
        # that are in progress for this node
        timestamp = create_timestamp()
        start_message = {
            "message-type"              : "web-server-start",
            "priority"                  : create_priority(),
            "unified-id"                : self._unified_id_factory.next(),
            "timestamp-repr"            : repr(timestamp),
            "source-node-name"          : _local_node_name,
        }

        self._data_writer_clients = list()
        for node_name, address in zip(_node_names, _data_writer_addresses):
            resilient_client = GreenletResilientClient(
                self._zeromq_context, 
                node_name,
                address,
                _client_tag,
                _web_server_pipeline_address,
                self._deliverator,
                connect_messages=[start_message, ]
            )
            resilient_client.link_exception(self._unhandled_greenlet_exception)
            self._data_writer_clients.append(resilient_client)

        self._data_reader_clients = list()
        self._data_readers = list()
        for node_name, address in zip(_node_names, _data_reader_addresses):
            resilient_client = GreenletResilientClient(
                self._zeromq_context, 
                node_name,
                address,
                _client_tag,
                _web_server_pipeline_address,
                self._deliverator,
                connect_messages=[start_message, ]
            )
            resilient_client.link_exception(self._unhandled_greenlet_exception)
            self._data_reader_clients.append(resilient_client)
            data_reader = DataReader(
                node_name, resilient_client
            )
            self._data_readers.append(data_reader)

        self._space_accounting_dealer_client = GreenletDealerClient(
            self._zeromq_context, 
            _local_node_name, 
            _space_accounting_server_address
        )
        self._space_accounting_dealer_client.link_exception(
            self._unhandled_greenlet_exception
        )

        push_client = GreenletPUSHClient(
            self._zeromq_context, 
            _local_node_name, 
            _space_accounting_pipeline_address,
        )

        self._accounting_client = SpaceAccountingClient(
            _local_node_name,
            self._space_accounting_dealer_client,
            push_client
        )

        self._event_push_client = EventPushClient(
            self._zeromq_context,
            "web-server"
        )

        self._watcher = Watcher(
            _stats, 
            self._data_reader_clients,
            self._data_writer_clients,
            self._event_push_client
        )

        id_translator_keys_path = os.path.join(
            _repository_path, "id_translator_keys.pkl"
        )
        with open(id_translator_keys_path, "r") as input_file:
            id_translator_keys = pickle.load(input_file)

        self._id_translator = InternalIDTranslator(
            id_translator_keys["key"],
            id_translator_keys["hmac_key"], 
            id_translator_keys["iv_key"],
            id_translator_keys["hmac_size"]
        )
        self.application = Application(
            self._central_connection,
            self._node_local_connection,
            self._cluster_row,
            self._unified_id_factory,
            self._id_translator,
            self._data_writer_clients,
            self._data_readers,
            authenticator,
            self._accounting_client,
            self._event_push_client,
            _stats
        )
        self.wsgi_server = WSGIServer(
            (_web_server_host, _web_server_port), 
            application=self.application,
            backlog=_wsgi_backlog
        )
    def setUp(self):
        if not hasattr(self, "_log"):
            self._log = logging.getLogger("TestHandoffServer")

        self.tearDown()
        database_connection = get_central_connection()
        cluster_row = get_cluster_row(database_connection)
        node_rows = get_node_rows(database_connection, cluster_row.id)
        database_connection.close()

        self._key_generator = generate_key()

        self._event_publisher_processes = list()
        self._data_writer_processes = list()
        self._data_reader_processes = list()
        self._handoff_server_processes = list()

        for i in xrange(_node_count):
            node_name = _generate_node_name(i)
            repository_path = _repository_path(node_name)
            os.makedirs(repository_path)
            
            process = start_event_publisher(
                node_name, 
                _event_publisher_pull_addresses[i],
                _event_publisher_pub_addresses[i]
            )
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._event_publisher_processes.append(process)
            time.sleep(1.0)

            process = start_data_writer(
                _cluster_name,
                node_name, 
                _data_writer_addresses[i],
                _event_publisher_pull_addresses[i],
                repository_path
            )
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._data_writer_processes.append(process)
            time.sleep(1.0)

            process = start_data_reader(
                node_name, 
                _data_reader_addresses[i],
                _event_publisher_pull_addresses[i], 
                repository_path
            )
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._data_reader_processes.append(process)
            time.sleep(1.0)

            process = start_handoff_server(
                _cluster_name,
                node_name, 
                _handoff_server_addresses,
                _handoff_server_pipeline_addresses[i],
                _data_reader_addresses,
                _data_writer_addresses,
                _event_publisher_pull_addresses[i], 
                _repository_path(node_name)
            )
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._handoff_server_processes.append(process)
            time.sleep(1.0)

        self._context = zmq.context.Context()
        self._pollster = GreenletZeroMQPollster()
        self._deliverator = Deliverator()

        self._pull_server = GreenletPULLServer(
            self._context, 
            _client_address,
            self._deliverator
        )
        self._pull_server.register(self._pollster)

        backup_nodes = random.sample(node_rows[1:], 2)
        self._log.debug("backup nodes = %s" % (
            [n.name for n in backup_nodes], 
        ))

        self._resilient_clients = list()        
        for node_row, address in zip(node_rows, _data_writer_addresses):
            if not node_row in backup_nodes:
                continue
            resilient_client = GreenletResilientClient(
                self._context,
                self._pollster,
                node_row.name,
                address,
                _local_node_name,
                _client_address,
                self._deliverator,
            )
            self._resilient_clients.append(resilient_client)
        self._log.debug("%s resilient clients" % (
            len(self._resilient_clients), 
        ))

        self._data_writer_handoff_client = DataWriterHandoffClient(
            node_rows[0].name,
            self._resilient_clients
        )

        self._pollster.start()
class TestHandoffServer(unittest.TestCase):
    """test message handling in handoff server"""

    def setUp(self):
        if not hasattr(self, "_log"):
            self._log = logging.getLogger("TestHandoffServer")

        self.tearDown()
        database_connection = get_central_connection()
        cluster_row = get_cluster_row(database_connection)
        node_rows = get_node_rows(database_connection, cluster_row.id)
        database_connection.close()

        self._key_generator = generate_key()

        self._event_publisher_processes = list()
        self._data_writer_processes = list()
        self._data_reader_processes = list()
        self._handoff_server_processes = list()

        for i in xrange(_node_count):
            node_name = _generate_node_name(i)
            repository_path = _repository_path(node_name)
            os.makedirs(repository_path)
            
            process = start_event_publisher(
                node_name, 
                _event_publisher_pull_addresses[i],
                _event_publisher_pub_addresses[i]
            )
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._event_publisher_processes.append(process)
            time.sleep(1.0)

            process = start_data_writer(
                _cluster_name,
                node_name, 
                _data_writer_addresses[i],
                _event_publisher_pull_addresses[i],
                repository_path
            )
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._data_writer_processes.append(process)
            time.sleep(1.0)

            process = start_data_reader(
                node_name, 
                _data_reader_addresses[i],
                _event_publisher_pull_addresses[i], 
                repository_path
            )
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._data_reader_processes.append(process)
            time.sleep(1.0)

            process = start_handoff_server(
                _cluster_name,
                node_name, 
                _handoff_server_addresses,
                _handoff_server_pipeline_addresses[i],
                _data_reader_addresses,
                _data_writer_addresses,
                _event_publisher_pull_addresses[i], 
                _repository_path(node_name)
            )
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._handoff_server_processes.append(process)
            time.sleep(1.0)

        self._context = zmq.context.Context()
        self._pollster = GreenletZeroMQPollster()
        self._deliverator = Deliverator()

        self._pull_server = GreenletPULLServer(
            self._context, 
            _client_address,
            self._deliverator
        )
        self._pull_server.register(self._pollster)

        backup_nodes = random.sample(node_rows[1:], 2)
        self._log.debug("backup nodes = %s" % (
            [n.name for n in backup_nodes], 
        ))

        self._resilient_clients = list()        
        for node_row, address in zip(node_rows, _data_writer_addresses):
            if not node_row in backup_nodes:
                continue
            resilient_client = GreenletResilientClient(
                self._context,
                self._pollster,
                node_row.name,
                address,
                _local_node_name,
                _client_address,
                self._deliverator,
            )
            self._resilient_clients.append(resilient_client)
        self._log.debug("%s resilient clients" % (
            len(self._resilient_clients), 
        ))

        self._data_writer_handoff_client = DataWriterHandoffClient(
            node_rows[0].name,
            self._resilient_clients
        )

        self._pollster.start()

    def tearDown(self):
        if hasattr(self, "_handoff_server_processes") \
        and self._handoff_server_processes is not None:
            print >> sys.stderr, "terminating _handoff_server_processes"
            for process in self._handoff_server_processes:
                terminate_process(process)
            self._handoff_server_processes = None
        if hasattr(self, "_data_writer_processes") \
        and self._data_writer_processes is not None:
            print >> sys.stderr, "terminating _data_writer_processes"
            for process in self._data_writer_processes:
                terminate_process(process)
            self._data_writer_processes = None
        if hasattr(self, "_data_reader_processes") \
        and self._data_reader_processes is not None:
            print >> sys.stderr, "terminating _data_reader_processes"
            for process in self._data_reader_processes:
                terminate_process(process)
            self._data_reader_processes = None
        if hasattr(self, "_event_publisher_processes") \
        and self._event_publisher_processes is not None:
            print >> sys.stderr, "terminating _event_publisher_processes"
            for process in self._event_publisher_processes:
                terminate_process(process)
            self._event_publisher_processes = None

        if hasattr(self, "_pollster") \
        and self._pollster is not None:
            print >> sys.stderr, "terminating _pollster"
            self._pollster.kill()
            self._pollster.join(timeout=3.0)
            self._pollster = None
        
        if hasattr(self, "_pull_server") \
        and self._pull_server is not None:
            print >> sys.stderr, "terminating _pull_server"
            self._pull_server.close()
            self._pull_server = None
 
        if hasattr(self, "_resilient_clients") \
        and self._resilient_clients is not None:
            print >> sys.stderr, "terminating _resilient_clients"
            for client in self._resilient_clients:
                client.close()
            self._resilient_clients = None
 
        if hasattr(self, "_context") \
        and self._context is not None:
            print >> sys.stderr, "terminating _context"
            self._context.term()
            self._context = None

        if os.path.exists(_test_dir):
            shutil.rmtree(_test_dir)

    def test_handoff_small_content(self):
        """test retrieving content that fits in a single message"""
        file_size = 10 * 64 * 1024
        file_content = random_string(file_size) 
        collection_id = 1001
        key  = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 5

        file_adler32 = zlib.adler32(file_content)
        file_md5 = hashlib.md5(file_content)

        message = {
            "message-type"      : "archive-key-entire",
            "priority"          : archive_priority,
            "collection-id"     : collection_id,
            "key"               : key, 
            "timestamp-repr"    : repr(timestamp),
            "segment-num"       : segment_num,
            "file-size"         : file_size,
            "file-adler32"      : file_adler32,
            "file-hash"         : b64encode(file_md5.digest()),
            "handoff-node-name" : None,
        }
        g = gevent.spawn(self._send_message_get_reply, message, file_content)
        g.join(timeout=10.0)
        self.assertEqual(g.ready(), True)
        reply = g.value
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        print >> sys.stderr, "archive successful"
        print >> sys.stderr, "press [Enter] to continue" 
        raw_input()

#    def test_handoff_large_content(self):
#        """test handing off content that fits in a multiple messages"""
#        segment_size = 120 * 1024
#        chunk_count = 10
#        total_size = int(1.2 * segment_size * chunk_count)
#        collection_id = 1001
#        test_data = [random_string(segment_size) for _ in range(chunk_count)]
#        key  = self._key_generator.next()
#        version_number = 0
#        segment_num = 5
#        sequence = 0
#        archive_priority = create_priority()
#        timestamp = create_timestamp()
#
#        file_adler32 = -42
#        file_md5 = "ffffffffffffffff"
#        segment_adler32 = 32
#        segment_md5 = "1111111111111111"
#
#        message = {
#            "message-type"      : "archive-key-start",
#            "priority"          : archive_priority,
#            "collection-id"     : collection_id,
#            "timestamp"         : timestamp,
#            "sequence"          : sequence,
#            "key"               : key, 
#            "version-number"    : version_number,
#            "segment-num"    : segment_num,
#            "segment-size"      : segment_size,
#        }
#        g = gevent.spawn(
#            self._send_message_get_reply, message, test_data[sequence]
#        )
#        g.join(timeout=10.0)
#        self.assertEqual(g.ready(), True)
#        reply = g.value
#        self.assertEqual(reply["message-type"], "archive-key-start-reply")
#        self.assertEqual(reply["result"], "success")
#
#        for content_item in test_data[1:-1]:
#            sequence += 1
#            message = {
#                "message-type"      : "archive-key-next",
#                "priority"          : archive_priority,
#                "collection-id"     : collection_id,
#                "key"               : key,
#                "version-number"    : version_number,
#                "segment-num"       : segment_num,
#                "sequence"          : sequence,
#            }
#            g = gevent.spawn(
#                self._send_message_get_reply, message, test_data[sequence]
#            )
#            g.join(timeout=10.0)
#            self.assertEqual(g.ready(), True)
#            reply = g.value
#            self.assertEqual(reply["message-type"], "archive-key-next-reply")
#            self.assertEqual(reply["result"], "success")
#        
#        sequence += 1
#        message = {
#            "message-type"      : "archive-key-final",
#            "priority"          : archive_priority,
#            "collection-id"     : collection_id,
#            "key"               : key,
#            "version-number"    : version_number,
#            "segment-num"       : segment_num,
#            "sequence"          : sequence,
#            "total-size"        : total_size,
#            "file-adler32"      : file_adler32,
#            "file-md5"          : b64encode(file_md5),
#            "segment-adler32"   : segment_adler32,
#            "segment-md5"       : b64encode(segment_md5),
#        }
#        g = gevent.spawn(
#            self._send_message_get_reply, message, test_data[sequence]
#        )
#        g.join(timeout=10.0)
#        self.assertEqual(g.ready(), True)
#        reply = g.value
#        self.assertEqual(reply["message-type"], "archive-key-final-reply")
#        self.assertEqual(reply["result"], "success")
#        self.assertEqual(reply["previous-size"], 0)
#
#        print >> sys.stderr, "archive successful: starting missing data writer"
#        self._start_missing_data_writer()
#        print >> sys.stderr, "data_writer started"
#        print >> sys.stderr, "press [Enter] to continue" 
#        raw_input()

    def _send_message_get_reply(self, message, content_item):
        completion_channel = \
            self._data_writer_handoff_client.queue_message_for_send(
                message, data=content_item
            )
        self._log.debug("before completion_channel.get()")
        reply, _ = completion_channel.get()
        self._log.debug("after completion_channel.get()")
        return reply
    def __init__(self, halt_event):
        self._log = logging.getLogger("WebWriter")
        memcached_client = memcache.Client(_memcached_nodes)

        self._interaction_pool = gdbpool.interaction_pool.DBInteractionPool(
            get_central_database_dsn(), 
            pool_name=_central_pool_name,
            pool_size=_database_pool_size, 
            do_log=True)

        authenticator = InteractionPoolAuthenticator(memcached_client, 
                                                     self._interaction_pool)

        # Ticket #25: must run database operation in a greenlet
        greenlet =  gevent.Greenlet.spawn(_get_cluster_row_and_node_row, 
                                           self._interaction_pool)
        greenlet.join()
        self._cluster_row, node_row = greenlet.get()

        self._unified_id_factory = UnifiedIDFactory(node_row.id)

        self._deliverator = Deliverator()

        self._zeromq_context = zmq.Context()

        self._pull_server = GreenletPULLServer(
            self._zeromq_context, 
            _web_writer_pipeliner_address,
            self._deliverator
        )
        self._pull_server.link_exception(self._unhandled_greenlet_exception)

        self._data_writer_clients = list()
        for node_name, address in zip(_node_names, _data_writer_addresses):
            resilient_client = GreenletResilientClient(
                self._zeromq_context, 
                node_name,
                address,
                _client_tag,
                _web_writer_pipeliner_address,
                self._deliverator,
                connect_messages=[]
            )
            resilient_client.link_exception(self._unhandled_greenlet_exception)
            self._data_writer_clients.append(resilient_client)

        self._space_accounting_dealer_client = GreenletDealerClient(
            self._zeromq_context, 
            _local_node_name, 
            _space_accounting_server_address
        )
        self._space_accounting_dealer_client.link_exception(
            self._unhandled_greenlet_exception
        )

        push_client = GreenletPUSHClient(
            self._zeromq_context, 
            _local_node_name, 
            _space_accounting_pipeline_address,
        )

        self._accounting_client = SpaceAccountingClient(
            _local_node_name,
            self._space_accounting_dealer_client,
            push_client
        )

        self._event_push_client = EventPushClient(
            self._zeromq_context,
            "web-server"
        )

        # message sent to data writers telling them the server
        # is (re)starting, thereby invalidating any archives
        # that are in progress for this node
        unified_id = self._unified_id_factory.next()
        timestamp = create_timestamp()
        self._event_push_client.info("web-writer-start",
                                     "web writer (re)start",
                                     unified_id=unified_id,
                                     timestamp_repr=repr(timestamp),
                                     source_node_name=_local_node_name)

        id_translator_keys_path = os.environ.get(
            "NIMBUS_IO_ID_TRANSLATION_KEYS", 
            os.path.join(_repository_path, "id_translator_keys.pkl"))
        with open(id_translator_keys_path, "r") as input_file:
            id_translator_keys = pickle.load(input_file)

        self._id_translator = InternalIDTranslator(
            id_translator_keys["key"],
            id_translator_keys["hmac_key"], 
            id_translator_keys["iv_key"],
            id_translator_keys["hmac_size"]
        )

        redis_queue = gevent.queue.Queue()

        self._redis_sink = OperationalStatsRedisSink(halt_event, 
                                                     redis_queue,
                                                     _local_node_name)
        self._redis_sink.link_exception(self._unhandled_greenlet_exception)

        self.application = Application(
            self._cluster_row,
            self._unified_id_factory,
            self._id_translator,
            self._data_writer_clients,
            authenticator,
            self._accounting_client,
            self._event_push_client,
            redis_queue
        )
        self.wsgi_server = WSGIServer((_web_writer_host, _web_writer_port), 
                                      application=self.application,
                                      backlog=_wsgi_backlog
        )
Exemple #10
0
    def __init__(self, halt_event):
        self._log = logging.getLogger("WebWriter")
        memcached_client = memcache.Client(_memcached_nodes)

        self._interaction_pool = gdbpool.interaction_pool.DBInteractionPool(
            get_central_database_dsn(),
            pool_name=_central_pool_name,
            pool_size=_database_pool_size,
            do_log=True)

        authenticator = InteractionPoolAuthenticator(memcached_client,
                                                     self._interaction_pool)

        # Ticket #25: must run database operation in a greenlet
        greenlet = gevent.Greenlet.spawn(_get_cluster_row_and_node_row,
                                         self._interaction_pool)
        greenlet.join()
        self._cluster_row, node_row = greenlet.get()

        self._unified_id_factory = UnifiedIDFactory(node_row.id)

        self._deliverator = Deliverator()

        self._zeromq_context = zmq.Context()

        self._pull_server = GreenletPULLServer(self._zeromq_context,
                                               _web_writer_pipeliner_address,
                                               self._deliverator)
        self._pull_server.link_exception(self._unhandled_greenlet_exception)

        self._data_writer_clients = list()
        for node_name, address in zip(_node_names, _data_writer_addresses):
            resilient_client = GreenletResilientClient(
                self._zeromq_context,
                node_name,
                address,
                _client_tag,
                _web_writer_pipeliner_address,
                self._deliverator,
                connect_messages=[])
            resilient_client.link_exception(self._unhandled_greenlet_exception)
            self._data_writer_clients.append(resilient_client)

        self._space_accounting_dealer_client = GreenletDealerClient(
            self._zeromq_context, _local_node_name,
            _space_accounting_server_address)
        self._space_accounting_dealer_client.link_exception(
            self._unhandled_greenlet_exception)

        push_client = GreenletPUSHClient(
            self._zeromq_context,
            _local_node_name,
            _space_accounting_pipeline_address,
        )

        self._accounting_client = SpaceAccountingClient(
            _local_node_name, self._space_accounting_dealer_client,
            push_client)

        self._event_push_client = EventPushClient(self._zeromq_context,
                                                  "web-server")

        # message sent to data writers telling them the server
        # is (re)starting, thereby invalidating any archives
        # that are in progress for this node
        unified_id = self._unified_id_factory.next()
        timestamp = create_timestamp()
        self._event_push_client.info("web-writer-start",
                                     "web writer (re)start",
                                     unified_id=unified_id,
                                     timestamp_repr=repr(timestamp),
                                     source_node_name=_local_node_name)

        id_translator_keys_path = os.environ.get(
            "NIMBUS_IO_ID_TRANSLATION_KEYS",
            os.path.join(_repository_path, "id_translator_keys.pkl"))
        with open(id_translator_keys_path, "r") as input_file:
            id_translator_keys = pickle.load(input_file)

        self._id_translator = InternalIDTranslator(
            id_translator_keys["key"], id_translator_keys["hmac_key"],
            id_translator_keys["iv_key"], id_translator_keys["hmac_size"])

        redis_queue = gevent.queue.Queue()

        self._redis_sink = OperationalStatsRedisSink(halt_event, redis_queue,
                                                     _local_node_name)
        self._redis_sink.link_exception(self._unhandled_greenlet_exception)

        self.application = Application(self._cluster_row,
                                       self._unified_id_factory,
                                       self._id_translator,
                                       self._data_writer_clients,
                                       authenticator, self._accounting_client,
                                       self._event_push_client, redis_queue)
        self.wsgi_server = WSGIServer((_web_writer_host, _web_writer_port),
                                      application=self.application,
                                      backlog=_wsgi_backlog)
Exemple #11
0
    def __init__(self):
        self._log = logging.getLogger("WebInternalReader")

        memcached_client = memcache.Client(_memcached_nodes)

        self._central_connection = get_central_connection()
        self._cluster_row = get_cluster_row(self._central_connection)
        self._node_local_connection = get_node_local_connection()
        self._deliverator = Deliverator()

        self._zeromq_context = zmq.Context()

        self._pull_server = GreenletPULLServer(
            self._zeromq_context, _web_internal_reader_pipeline_address,
            self._deliverator)
        self._pull_server.link_exception(self._unhandled_greenlet_exception)

        self._data_reader_clients = list()
        self._data_readers = list()
        for node_name, address in zip(_node_names, _data_reader_addresses):
            resilient_client = GreenletResilientClient(
                self._zeromq_context,
                node_name,
                address,
                _client_tag,
                _web_internal_reader_pipeline_address,
                self._deliverator,
                connect_messages=[])
            resilient_client.link_exception(self._unhandled_greenlet_exception)
            self._data_reader_clients.append(resilient_client)
            data_reader = DataReader(node_name, resilient_client)
            self._data_readers.append(data_reader)

        self._space_accounting_dealer_client = GreenletDealerClient(
            self._zeromq_context, _local_node_name,
            _space_accounting_server_address)
        self._space_accounting_dealer_client.link_exception(
            self._unhandled_greenlet_exception)

        push_client = GreenletPUSHClient(
            self._zeromq_context,
            _local_node_name,
            _space_accounting_pipeline_address,
        )

        self._accounting_client = SpaceAccountingClient(
            _local_node_name, self._space_accounting_dealer_client,
            push_client)

        self._event_push_client = EventPushClient(self._zeromq_context,
                                                  "web-internal-reader")

        # message sent to data readers telling them the server
        # is (re)starting, thereby invalidating any archvies or retrieved
        # that are in progress for this node
        timestamp = create_timestamp()
        self._event_push_client.info("web-reader-start",
                                     "web reader (re)start",
                                     timestamp_repr=repr(timestamp),
                                     source_node_name=_local_node_name)

        self._watcher = Watcher(_stats, self._data_reader_clients,
                                self._event_push_client)

        self.application = Application(memcached_client,
                                       self._central_connection,
                                       self._node_local_connection,
                                       self._cluster_row, self._data_readers,
                                       self._accounting_client,
                                       self._event_push_client, _stats)
        self.wsgi_server = WSGIServer(
            (_web_internal_reader_host, _web_internal_reader_port),
            application=self.application,
            backlog=_wsgi_backlog)