def test_simple_output_value_file(self): """test writing a simple output value file""" collection_id = 1001 segment_id = 42 data_size = 1024 data = random_string(data_size) output_value_file = OutputValueFile( self._database_connection, _repository_path ) self.assertEqual(output_value_file.size, 0) output_value_file.write_data_for_one_sequence( collection_id, segment_id, data ) self.assertEqual(output_value_file.size, data_size) output_value_file.close() value_file_row = _retrieve_value_file_row( self._database_connection, output_value_file._value_file_id ) self.assertEqual(value_file_row.size, data_size) data_md5_hash = hashlib.md5(data).digest() self.assertEqual(str(value_file_row.hash), data_md5_hash) self.assertEqual(value_file_row.sequence_count, 1) self.assertEqual(value_file_row.min_segment_id, segment_id) self.assertEqual(value_file_row.max_segment_id, segment_id) self.assertEqual(value_file_row.distinct_collection_count, 1) self.assertEqual(value_file_row.collection_ids, [collection_id, ])
def test_handoff_small_content(self): """test retrieving content that fits in a single message""" file_size = 10 * 64 * 1024 file_content = random_string(file_size) collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 5 file_adler32 = zlib.adler32(file_content) file_md5 = hashlib.md5(file_content) message = { "message-type" : "archive-key-entire", "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "file-size" : file_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "handoff-node-name" : None, } g = gevent.spawn(self._send_message_get_reply, message, file_content) g.join(timeout=10.0) self.assertEqual(g.ready(), True) reply = g.value self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") print >> sys.stderr, "archive successful" print >> sys.stderr, "press [Enter] to continue" raw_input()
def test_handoff_small_content(self): """test retrieving content that fits in a single message""" file_size = 10 * 64 * 1024 file_content = random_string(file_size) collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 5 file_adler32 = zlib.adler32(file_content) file_md5 = hashlib.md5(file_content) message = { "message-type": "archive-key-entire", "priority": archive_priority, "collection-id": collection_id, "key": key, "timestamp-repr": repr(timestamp), "segment-num": segment_num, "file-size": file_size, "file-adler32": file_adler32, "file-hash": b64encode(file_md5.digest()), "handoff-node-name": None, } g = gevent.spawn(self._send_message_get_reply, message, file_content) g.join(timeout=10.0) self.assertEqual(g.ready(), True) reply = g.value self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") print >> sys.stderr, "archive successful" print >> sys.stderr, "press [Enter] to continue" raw_input()
def _generate_key(): """generate a key string""" result = base64.b64encode(random_string(32)) if type(result) == str: result.rstrip("=") else: while result[-1] == ord('='): result = result[:-1] return result
def test_simple_segment(self): """test writing an reading a simple segment of one sequence""" collection_id = 1001 key = "aaa/bbb/ccc" timestamp = create_timestamp() segment_num = 42 sequence_num = 0 data_size = 1024 data = random_string(data_size) data_adler32 = zlib.adler32(data) data_md5 = hashlib.md5(data) file_tombstone = False writer = Writer(self._database_connection, _repository_path) # clean out any segments that are laying around for this (test) keu reader = Reader(self._database_connection, _repository_path) writer.start_new_segment(collection_id, key, repr(timestamp), segment_num) writer.store_sequence( collection_id, key, repr(timestamp), segment_num, sequence_num, data ) writer.finish_new_segment( collection_id, key, repr(timestamp), segment_num, data_size, data_adler32, data_md5.digest(), file_tombstone, handoff_node_id=None, ) writer.close() file_info = most_recent_timestamp_for_key( self._database_connection, collection_id, key ) self.assertEqual(file_info.file_size, data_size) self.assertEqual(file_info.file_adler32, data_adler32) self.assertEqual(str(file_info.file_hash), data_md5.digest()) self.assertEqual(file_info.file_tombstone, file_tombstone) reader = Reader(self._database_connection, _repository_path) sequence_generator = reader.generate_all_sequence_rows_for_segment( collection_id, key, file_info.timestamp, file_info.segment_num ) # first yield should be a count sequence_count = sequence_generator.next() self.assertEqual(sequence_count, 1) sequence_data = sequence_generator.next() self.assertEqual(len(sequence_data), len(data)) self.assertEqual(sequence_data, data)
def xxxtest_archive_key_entire_with_meta(self): """ test archiving a key in a single message, including meta data """ file_size = 10 * 64 * 1024 content_item = random_string(file_size) user_request_id = uuid.uuid1().hex collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 2 meta_key = "".join([nimbus_meta_prefix, "test_key"]) meta_value = "pork" file_adler32 = zlib.adler32(content_item) file_md5 = hashlib.md5(content_item) unified_id_factory = UnifiedIDFactory(1) unified_id = unified_id_factory.next() message = { "message-type" : "archive-key-entire", "priority" : archive_priority, "user-request-id" : user_request_id, "collection-id" : collection_id, "key" : key, "unified-id" : unified_id, "timestamp-repr" : repr(timestamp), "conjoined-part" : 0, "segment-num" : segment_num, "segment-size" : file_size, "zfec-padding-size" : 4, "segment-adler32" : file_adler32, "segment-md5-digest": b64encode(file_md5.digest()), "file-size" : file_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "source-node-name" : _local_node_name, "handoff-node-name" : None, meta_key : meta_value } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=content_item ) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"])
def xxxtest_destroy_tombstone(self): """test destroying a key that has already been destroyed""" file_size = 10 * 64 * 1024 content_item = random_string(file_size) message_id = uuid.uuid1().hex collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() archive_timestamp = create_timestamp() destroy_1_timestamp = archive_timestamp + timedelta(seconds=1) destroy_2_timestamp = destroy_1_timestamp + timedelta(seconds=1) segment_num = 2 file_adler32 = zlib.adler32(content_item) file_md5 = hashlib.md5(content_item) message = { "message-type" : "archive-key-entire", "message-id" : message_id, "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "timestamp-repr" : repr(archive_timestamp), "segment-num" : segment_num, "segment-size" : file_size, "segment-adler32" : file_adler32, "segment-md5-digest": b64encode(file_md5.digest()), "file-size" : file_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=content_item ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") reply = self._destroy( collection_id, key, destroy_1_timestamp, segment_num ) self.assertEqual(reply["result"], "success", reply["error-message"]) reply = self._destroy( collection_id, key, destroy_2_timestamp, segment_num ) self.assertEqual(reply["result"], "success", reply["error-message"])
def xxxtest_archive_key_entire_with_meta(self): """ test archiving a key in a single message, including meta data """ file_size = 10 * 64 * 1024 content_item = random_string(file_size) user_request_id = uuid.uuid1().hex collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 2 meta_key = "".join([nimbus_meta_prefix, "test_key"]) meta_value = "pork" file_adler32 = zlib.adler32(content_item) file_md5 = hashlib.md5(content_item) unified_id_factory = UnifiedIDFactory(1) unified_id = unified_id_factory.next() message = { "message-type": "archive-key-entire", "priority": archive_priority, "user-request-id": user_request_id, "collection-id": collection_id, "key": key, "unified-id": unified_id, "timestamp-repr": repr(timestamp), "conjoined-part": 0, "segment-num": segment_num, "segment-size": file_size, "zfec-padding-size": 4, "segment-adler32": file_adler32, "segment-md5-digest": b64encode(file_md5.digest()), "file-size": file_size, "file-adler32": file_adler32, "file-hash": b64encode(file_md5.digest()), "source-node-name": _local_node_name, "handoff-node-name": None, meta_key: meta_value } reply = send_request_and_get_reply(_local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=content_item) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"])
def xxxtest_destroy_tombstone(self): """test destroying a key that has already been destroyed""" file_size = 10 * 64 * 1024 content_item = random_string(file_size) message_id = uuid.uuid1().hex collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() archive_timestamp = create_timestamp() destroy_1_timestamp = archive_timestamp + timedelta(seconds=1) destroy_2_timestamp = destroy_1_timestamp + timedelta(seconds=1) segment_num = 2 file_adler32 = zlib.adler32(content_item) file_md5 = hashlib.md5(content_item) message = { "message-type": "archive-key-entire", "message-id": message_id, "priority": archive_priority, "collection-id": collection_id, "key": key, "timestamp-repr": repr(archive_timestamp), "segment-num": segment_num, "segment-size": file_size, "segment-adler32": file_adler32, "segment-md5-digest": b64encode(file_md5.digest()), "file-size": file_size, "file-adler32": file_adler32, "file-hash": b64encode(file_md5.digest()), "handoff-node-name": None, } reply = send_request_and_get_reply(_local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=content_item) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") reply = self._destroy(collection_id, key, destroy_1_timestamp, segment_num) self.assertEqual(reply["result"], "success", reply["error-message"]) reply = self._destroy(collection_id, key, destroy_2_timestamp, segment_num) self.assertEqual(reply["result"], "success", reply["error-message"])
def test_archive_key_entire(self): """test archiving all data for a key in a single message""" file_size = 10 * 64 * 1024 content_item = random_string(file_size) message_id = uuid.uuid1().hex collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 2 file_adler32 = zlib.adler32(content_item) file_md5 = hashlib.md5(content_item) message = { "message-type" : "archive-key-entire", "message-id" : message_id, "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "segment-size" : file_size, "segment-adler32" : file_adler32, "segment-md5-digest": b64encode(file_md5.digest()), "file-size" : file_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=content_item ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success")
def xxxtest_large_archive(self): """ test archiving a file that needs more than one message. For example, a 10 Mb file: each node would get 10 120kb zefec shares. """ slice_size = 1024 * 1024 slice_count = 10 total_size = slice_size * slice_count test_data = random_string(total_size) user_request_id = uuid.uuid1().hex collection_id = 1001 archive_priority = create_priority() timestamp = create_timestamp() key = self._key_generator.next() segment_num = 4 sequence_num = 0 file_adler32 = zlib.adler32(test_data) file_md5 = hashlib.md5(test_data) slice_start = 0 slice_end = slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) unified_id_factory = UnifiedIDFactory(1) unified_id = unified_id_factory.next() message = { "message-type" : "archive-key-start", "priority" : archive_priority, "user-request-id" : user_request_id, "collection-id" : collection_id, "key" : key, "unified-id" : unified_id, "timestamp-repr" : repr(timestamp), "conjoined-part" : 0, "segment-num" : segment_num, "segment-size" : len(test_data[slice_start:slice_end]), "zfec-padding-size" : 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32" : segment_adler32, "sequence-num" : sequence_num, "source-node-name" : _local_node_name, "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-type"], "archive-key-start-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"]) for _ in range(slice_count-2): sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type" : "archive-key-next", "priority" : archive_priority, "user-request-id" : user_request_id, "collection-id" : collection_id, "key" : key, "unified-id" : unified_id, "timestamp-repr" : repr(timestamp), "conjoined-part" : 0, "segment-num" : segment_num, "segment-size" : len(test_data[slice_start:slice_end]), "zfec-padding-size" : 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32" : segment_adler32, "sequence-num" : sequence_num, "source-node-name" : _local_node_name, "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-type"], "archive-key-next-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"]) sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message = { "message-type" : "archive-key-final", "priority" : archive_priority, "user-request-id" : user_request_id, "collection-id" : collection_id, "key" : key, "unified-id" : unified_id, "timestamp-repr" : repr(timestamp), "conjoined-part" : 0, "segment-num" : segment_num, "segment-size" : len(test_data[slice_start:slice_end]), "zfec-padding-size" : 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32" : segment_adler32, "sequence-num" : sequence_num, "file-size" : total_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "source-node-name" : _local_node_name, "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"])
def test_retrieve_small_content(self): """test retrieving content that fits in a single message""" file_size = 10 * 64 * 1024 file_content = random_string(file_size) collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 2 file_adler32 = zlib.adler32(file_content) file_md5 = hashlib.md5(file_content) message_id = uuid.uuid1().hex message = { "message-type" : "archive-key-entire", "message-id" : message_id, "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "conjoined-unified-id" : None, "conjoined-part" : 0, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "segment-size" : file_size, "segment-adler32" : file_adler32, "segment-md5-digest" : b64encode(file_md5.digest()), "file-size" : file_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=file_content ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") # get file info from the local database _conjoined_row, segment_rows = current_status_of_key( self._database_connection, collection_id, key ) self.assertEqual(len(segment_rows), 1) message_id = uuid.uuid1().hex message = { "message-type" : "retrieve-key-start", "message-id" : message_id, "collection-id" : collection_id, "key" : key, "timestamp-repr" : repr(timestamp), "conjoined-unified-id" : None, "conjoined-part" : 0, "segment-num" : segment_num } reply, data = send_request_and_get_reply_and_data( _local_node_name, _data_reader_address, _local_node_name, _client_address, message ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "retrieve-key-reply") self.assertEqual(reply["completed"], True) self.assertEqual(len(data), len(file_content)) self.assertEqual(data, file_content)
def _generate_key(): """generate a key string""" return base64.b64encode(random_string(32)).rstrip('=')
def test_retrieve_large_content(self): """test retrieving content that fits in a multiple messages""" slice_size = 1024 * 1024 slice_count = 10 total_size = slice_size * slice_count test_data = random_string(total_size) collection_id = 1001 archive_priority = create_priority() timestamp = create_timestamp() key = self._key_generator.next() segment_num = 4 sequence_num = 0 file_adler32 = zlib.adler32(test_data) file_md5 = hashlib.md5(test_data) slice_start = 0 slice_end = slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type": "archive-key-start", "message-id": message_id, "priority": archive_priority, "collection-id": collection_id, "key": key, "conjoined-unified-id": None, "conjoined-part": 0, "timestamp-repr": repr(timestamp), "segment-num": segment_num, "segment-size": len(test_data[slice_start:slice_end]), "segment-adler32": segment_adler32, "segment-md5-digest": b64encode(segment_md5.digest()), "sequence-num": sequence_num, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end]) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-start-reply") self.assertEqual(reply["result"], "success") for _ in range(slice_count - 2): sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type": "archive-key-next", "message-id": message_id, "priority": archive_priority, "collection-id": collection_id, "key": key, "conjoined-unified-id": None, "conjoined-part": 0, "timestamp-repr": repr(timestamp), "segment-num": segment_num, "segment-size": len(test_data[slice_start:slice_end]), "segment-adler32": segment_adler32, "segment-md5-digest": b64encode(segment_md5.digest()), "sequence-num": sequence_num, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end]) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-next-reply") self.assertEqual(reply["result"], "success") sequence_num += 1 slice_start += slice_size slice_end += slice_size self.assertEqual(slice_end, total_size) segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type": "archive-key-final", "message-id": message_id, "priority": archive_priority, "collection-id": collection_id, "key": key, "conjoined-unified-id": None, "conjoined-part": 0, "timestamp-repr": repr(timestamp), "segment-num": segment_num, "segment-size": len(test_data[slice_start:slice_end]), "segment-adler32": segment_adler32, "segment-md5-digest": b64encode(segment_md5.digest()), "sequence-num": sequence_num, "file-size": total_size, "file-adler32": file_adler32, "file-hash": b64encode(file_md5.digest()), "handoff-node-name": None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end]) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") # get file info from the local database _conjoined_row, segment_rows = current_status_of_key( self._database_connection, collection_id, key) self.assertEqual(len(segment_rows), 1) retrieved_data_list = list() message_id = uuid.uuid1().hex message = { "message-type": "retrieve-key-start", "message-id": message_id, "collection-id": collection_id, "key": key, "timestamp-repr": repr(timestamp), "conjoined-unified-id": None, "conjoined-part": 0, "segment-num": segment_num } reply, data = send_request_and_get_reply_and_data( _local_node_name, _data_reader_address, _local_node_name, _client_address, message) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "retrieve-key-reply") self.assertEqual(reply["completed"], False) print "sequence-num =", reply["sequence-num"] retrieved_data_list.append(data) while True: message_id = uuid.uuid1().hex message = { "message-type": "retrieve-key-next", "message-id": message_id, "collection-id": collection_id, "key": key, "timestamp-repr": repr(timestamp), "conjoined-unified-id": None, "conjoined-part": 0, "segment-num": segment_num } reply, data = send_request_and_get_reply_and_data( _local_node_name, _data_reader_address, _local_node_name, _client_address, message) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "retrieve-key-reply") retrieved_data_list.append(data) print "sequence-num =", reply["sequence-num"] if reply["completed"]: break retrieved_data = "".join(retrieved_data_list) self.assertEqual(len(retrieved_data), len(test_data)) self.assertEqual(retrieved_data, test_data)
def test_retrieve_small_content(self): """test retrieving content that fits in a single message""" file_size = 10 * 64 * 1024 file_content = random_string(file_size) collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 2 file_adler32 = zlib.adler32(file_content) file_md5 = hashlib.md5(file_content) message_id = uuid.uuid1().hex message = { "message-type": "archive-key-entire", "message-id": message_id, "priority": archive_priority, "collection-id": collection_id, "key": key, "conjoined-unified-id": None, "conjoined-part": 0, "timestamp-repr": repr(timestamp), "segment-num": segment_num, "segment-size": file_size, "segment-adler32": file_adler32, "segment-md5-digest": b64encode(file_md5.digest()), "file-size": file_size, "file-adler32": file_adler32, "file-hash": b64encode(file_md5.digest()), "handoff-node-name": None, } reply = send_request_and_get_reply(_local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=file_content) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") # get file info from the local database _conjoined_row, segment_rows = current_status_of_key( self._database_connection, collection_id, key) self.assertEqual(len(segment_rows), 1) message_id = uuid.uuid1().hex message = { "message-type": "retrieve-key-start", "message-id": message_id, "collection-id": collection_id, "key": key, "timestamp-repr": repr(timestamp), "conjoined-unified-id": None, "conjoined-part": 0, "segment-num": segment_num } reply, data = send_request_and_get_reply_and_data( _local_node_name, _data_reader_address, _local_node_name, _client_address, message) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "retrieve-key-reply") self.assertEqual(reply["completed"], True) self.assertEqual(len(data), len(file_content)) self.assertEqual(data, file_content)
def xxxtest_large_archive(self): """ test archiving a file that needs more than one message. For example, a 10 Mb file: each node would get 10 120kb zefec shares. """ slice_size = 1024 * 1024 slice_count = 10 total_size = slice_size * slice_count test_data = random_string(total_size) user_request_id = uuid.uuid1().hex collection_id = 1001 archive_priority = create_priority() timestamp = create_timestamp() key = self._key_generator.next() segment_num = 4 sequence_num = 0 file_adler32 = zlib.adler32(test_data) file_md5 = hashlib.md5(test_data) slice_start = 0 slice_end = slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) unified_id_factory = UnifiedIDFactory(1) unified_id = unified_id_factory.next() message = { "message-type": "archive-key-start", "priority": archive_priority, "user-request-id": user_request_id, "collection-id": collection_id, "key": key, "unified-id": unified_id, "timestamp-repr": repr(timestamp), "conjoined-part": 0, "segment-num": segment_num, "segment-size": len(test_data[slice_start:slice_end]), "zfec-padding-size": 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32": segment_adler32, "sequence-num": sequence_num, "source-node-name": _local_node_name, "handoff-node-name": None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end]) self.assertEqual(reply["message-type"], "archive-key-start-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"]) for _ in range(slice_count - 2): sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type": "archive-key-next", "priority": archive_priority, "user-request-id": user_request_id, "collection-id": collection_id, "key": key, "unified-id": unified_id, "timestamp-repr": repr(timestamp), "conjoined-part": 0, "segment-num": segment_num, "segment-size": len(test_data[slice_start:slice_end]), "zfec-padding-size": 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32": segment_adler32, "sequence-num": sequence_num, "source-node-name": _local_node_name, "handoff-node-name": None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end]) self.assertEqual(reply["message-type"], "archive-key-next-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"]) sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message = { "message-type": "archive-key-final", "priority": archive_priority, "user-request-id": user_request_id, "collection-id": collection_id, "key": key, "unified-id": unified_id, "timestamp-repr": repr(timestamp), "conjoined-part": 0, "segment-num": segment_num, "segment-size": len(test_data[slice_start:slice_end]), "zfec-padding-size": 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32": segment_adler32, "sequence-num": sequence_num, "file-size": total_size, "file-adler32": file_adler32, "file-hash": b64encode(file_md5.digest()), "source-node-name": _local_node_name, "handoff-node-name": None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end]) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"])
def test_retrieve_large_content(self): """test retrieving content that fits in a multiple messages""" slice_size = 1024 * 1024 slice_count = 10 total_size = slice_size * slice_count test_data = random_string(total_size) collection_id = 1001 archive_priority = create_priority() timestamp = create_timestamp() key = self._key_generator.next() segment_num = 4 sequence_num = 0 file_adler32 = zlib.adler32(test_data) file_md5 = hashlib.md5(test_data) slice_start = 0 slice_end = slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type" : "archive-key-start", "message-id" : message_id, "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "conjoined-unified-id" : None, "conjoined-part" : 0, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "segment-size" : len(test_data[slice_start:slice_end]), "segment-adler32" : segment_adler32, "segment-md5-digest" : b64encode(segment_md5.digest()), "sequence-num" : sequence_num, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-start-reply") self.assertEqual(reply["result"], "success") for _ in range(slice_count-2): sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type" : "archive-key-next", "message-id" : message_id, "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "conjoined-unified-id" : None, "conjoined-part" : 0, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "segment-size" : len( test_data[slice_start:slice_end] ), "segment-adler32" : segment_adler32, "segment-md5-digest" : b64encode(segment_md5.digest()), "sequence-num" : sequence_num, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-next-reply") self.assertEqual(reply["result"], "success") sequence_num += 1 slice_start += slice_size slice_end += slice_size self.assertEqual(slice_end, total_size) segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type" : "archive-key-final", "message-id" : message_id, "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "conjoined-unified-id" : None, "conjoined-part" : 0, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "segment-size" : len(test_data[slice_start:slice_end]), "segment-adler32" : segment_adler32, "segment-md5-digest" : b64encode(segment_md5.digest()), "sequence-num" : sequence_num, "file-size" : total_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") # get file info from the local database _conjoined_row, segment_rows = current_status_of_key( self._database_connection, collection_id, key ) self.assertEqual(len(segment_rows), 1) retrieved_data_list = list() message_id = uuid.uuid1().hex message = { "message-type" : "retrieve-key-start", "message-id" : message_id, "collection-id" : collection_id, "key" : key, "timestamp-repr" : repr(timestamp), "conjoined-unified-id" : None, "conjoined-part" : 0, "segment-num" : segment_num } reply, data = send_request_and_get_reply_and_data( _local_node_name, _data_reader_address, _local_node_name, _client_address, message ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "retrieve-key-reply") self.assertEqual(reply["completed"], False) print "sequence-num =", reply["sequence-num"] retrieved_data_list.append(data) while True: message_id = uuid.uuid1().hex message = { "message-type" : "retrieve-key-next", "message-id" : message_id, "collection-id" : collection_id, "key" : key, "timestamp-repr" : repr(timestamp), "conjoined-unified-id" : None, "conjoined-part" : 0, "segment-num" : segment_num } reply, data = send_request_and_get_reply_and_data( _local_node_name, _data_reader_address, _local_node_name, _client_address, message ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "retrieve-key-reply") retrieved_data_list.append(data) print "sequence-num =", reply["sequence-num"] if reply["completed"]: break retrieved_data = "".join(retrieved_data_list) self.assertEqual(len(retrieved_data), len(test_data)) self.assertEqual(retrieved_data, test_data)