def test_simple_output_value_file(self):
        """test writing a simple output value file"""
        collection_id = 1001
        segment_id = 42
        data_size = 1024
        data = random_string(data_size)
        output_value_file = OutputValueFile(
            self._database_connection, _repository_path
        )
        self.assertEqual(output_value_file.size, 0)
        output_value_file.write_data_for_one_sequence(
            collection_id, segment_id, data
        )
        self.assertEqual(output_value_file.size, data_size)
        output_value_file.close()
        
        value_file_row = _retrieve_value_file_row(
            self._database_connection, output_value_file._value_file_id
        )

        self.assertEqual(value_file_row.size, data_size)
        data_md5_hash = hashlib.md5(data).digest()
        self.assertEqual(str(value_file_row.hash), data_md5_hash)
        self.assertEqual(value_file_row.sequence_count, 1)
        self.assertEqual(value_file_row.min_segment_id, segment_id)
        self.assertEqual(value_file_row.max_segment_id, segment_id)
        self.assertEqual(value_file_row.distinct_collection_count, 1)
        self.assertEqual(value_file_row.collection_ids, [collection_id, ])
    def test_handoff_small_content(self):
        """test retrieving content that fits in a single message"""
        file_size = 10 * 64 * 1024
        file_content = random_string(file_size) 
        collection_id = 1001
        key  = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 5

        file_adler32 = zlib.adler32(file_content)
        file_md5 = hashlib.md5(file_content)

        message = {
            "message-type"      : "archive-key-entire",
            "priority"          : archive_priority,
            "collection-id"     : collection_id,
            "key"               : key, 
            "timestamp-repr"    : repr(timestamp),
            "segment-num"       : segment_num,
            "file-size"         : file_size,
            "file-adler32"      : file_adler32,
            "file-hash"         : b64encode(file_md5.digest()),
            "handoff-node-name" : None,
        }
        g = gevent.spawn(self._send_message_get_reply, message, file_content)
        g.join(timeout=10.0)
        self.assertEqual(g.ready(), True)
        reply = g.value
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        print >> sys.stderr, "archive successful"
        print >> sys.stderr, "press [Enter] to continue" 
        raw_input()
Exemple #3
0
    def test_handoff_small_content(self):
        """test retrieving content that fits in a single message"""
        file_size = 10 * 64 * 1024
        file_content = random_string(file_size)
        collection_id = 1001
        key = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 5

        file_adler32 = zlib.adler32(file_content)
        file_md5 = hashlib.md5(file_content)

        message = {
            "message-type": "archive-key-entire",
            "priority": archive_priority,
            "collection-id": collection_id,
            "key": key,
            "timestamp-repr": repr(timestamp),
            "segment-num": segment_num,
            "file-size": file_size,
            "file-adler32": file_adler32,
            "file-hash": b64encode(file_md5.digest()),
            "handoff-node-name": None,
        }
        g = gevent.spawn(self._send_message_get_reply, message, file_content)
        g.join(timeout=10.0)
        self.assertEqual(g.ready(), True)
        reply = g.value
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        print >> sys.stderr, "archive successful"
        print >> sys.stderr, "press [Enter] to continue"
        raw_input()
Exemple #4
0
def _generate_key():
    """generate a key string"""
    result = base64.b64encode(random_string(32))
    if type(result) == str:
        result.rstrip("=")
    else:
        while result[-1] == ord('='):
            result = result[:-1]
    return result
Exemple #5
0
def _generate_key():
    """generate a key string"""
    result = base64.b64encode(random_string(32))
    if type(result) == str:
        result.rstrip("=")
    else:
        while result[-1] == ord('='):
            result = result[:-1]
    return result
    def test_simple_segment(self):
        """test writing an reading a simple segment of one sequence"""
        collection_id = 1001
        key = "aaa/bbb/ccc"
        timestamp = create_timestamp()
        segment_num = 42
        sequence_num = 0
        data_size = 1024
        data = random_string(data_size)
        data_adler32 = zlib.adler32(data)
        data_md5 = hashlib.md5(data)
        file_tombstone = False
 
        writer = Writer(self._database_connection, _repository_path)

        # clean out any segments that are laying around for this (test) keu
        reader = Reader(self._database_connection, _repository_path)

        writer.start_new_segment(collection_id, key, repr(timestamp), segment_num)
        writer.store_sequence(
            collection_id, key, repr(timestamp), segment_num, sequence_num, data
        )
        writer.finish_new_segment(
            collection_id, 
            key, 
            repr(timestamp), 
            segment_num,
            data_size,
            data_adler32,
            data_md5.digest(),
            file_tombstone,
            handoff_node_id=None,
        )
        writer.close()

        file_info = most_recent_timestamp_for_key(
            self._database_connection, collection_id, key
        )

        self.assertEqual(file_info.file_size, data_size) 
        self.assertEqual(file_info.file_adler32, data_adler32) 
        self.assertEqual(str(file_info.file_hash), data_md5.digest()) 
        self.assertEqual(file_info.file_tombstone, file_tombstone) 

        reader = Reader(self._database_connection, _repository_path)
        sequence_generator = reader.generate_all_sequence_rows_for_segment(
            collection_id, key, file_info.timestamp, file_info.segment_num
        )

        # first yield should be a count
        sequence_count = sequence_generator.next()
        self.assertEqual(sequence_count, 1) 

        sequence_data = sequence_generator.next()
        self.assertEqual(len(sequence_data), len(data))
        self.assertEqual(sequence_data, data)
    def xxxtest_archive_key_entire_with_meta(self):
        """
        test archiving a key in a single message, including meta data
        """
        file_size = 10 * 64 * 1024
        content_item = random_string(file_size) 
        user_request_id = uuid.uuid1().hex
        collection_id = 1001
        key  = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 2

        meta_key = "".join([nimbus_meta_prefix, "test_key"])
        meta_value = "pork"

        file_adler32 = zlib.adler32(content_item)
        file_md5 = hashlib.md5(content_item)

        unified_id_factory = UnifiedIDFactory(1)
        unified_id = unified_id_factory.next()

        message = {
            "message-type"      : "archive-key-entire",
            "priority"          : archive_priority,
            "user-request-id"   : user_request_id,
            "collection-id"     : collection_id,
            "key"               : key, 
            "unified-id"        : unified_id,
            "timestamp-repr"    : repr(timestamp),
            "conjoined-part"    : 0,
            "segment-num"       : segment_num,
            "segment-size"      : file_size,
            "zfec-padding-size" : 4,
            "segment-adler32"   : file_adler32,
            "segment-md5-digest": b64encode(file_md5.digest()),
            "file-size"         : file_size,
            "file-adler32"      : file_adler32,
            "file-hash"         : b64encode(file_md5.digest()),
            "source-node-name"  : _local_node_name,
            "handoff-node-name" : None,
            meta_key            : meta_value
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=content_item
        )
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["user-request-id"], user_request_id)
        self.assertEqual(reply["result"], "success", reply["error-message"])
    def xxxtest_destroy_tombstone(self):
        """test destroying a key that has already been destroyed"""
        file_size = 10 * 64 * 1024
        content_item = random_string(file_size) 
        message_id = uuid.uuid1().hex
        collection_id = 1001
        key  = self._key_generator.next()
        archive_priority = create_priority()
        archive_timestamp = create_timestamp()
        destroy_1_timestamp = archive_timestamp + timedelta(seconds=1)
        destroy_2_timestamp = destroy_1_timestamp + timedelta(seconds=1)
        segment_num = 2

        file_adler32 = zlib.adler32(content_item)
        file_md5 = hashlib.md5(content_item)

        message = {
            "message-type"      : "archive-key-entire",
            "message-id"        : message_id,
            "priority"          : archive_priority,
            "collection-id"     : collection_id,
            "key"               : key, 
            "timestamp-repr"    : repr(archive_timestamp),
            "segment-num"       : segment_num,
            "segment-size"      : file_size,
            "segment-adler32"   : file_adler32,
            "segment-md5-digest": b64encode(file_md5.digest()),
            "file-size"         : file_size,
            "file-adler32"      : file_adler32,
            "file-hash"         : b64encode(file_md5.digest()),
            "handoff-node-name" : None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=content_item
        )
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        reply = self._destroy(
            collection_id, key, destroy_1_timestamp, segment_num
        )
        self.assertEqual(reply["result"], "success", reply["error-message"])

        reply = self._destroy(
            collection_id, key, destroy_2_timestamp, segment_num
        )
        self.assertEqual(reply["result"], "success", reply["error-message"])
Exemple #9
0
    def xxxtest_archive_key_entire_with_meta(self):
        """
        test archiving a key in a single message, including meta data
        """
        file_size = 10 * 64 * 1024
        content_item = random_string(file_size)
        user_request_id = uuid.uuid1().hex
        collection_id = 1001
        key = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 2

        meta_key = "".join([nimbus_meta_prefix, "test_key"])
        meta_value = "pork"

        file_adler32 = zlib.adler32(content_item)
        file_md5 = hashlib.md5(content_item)

        unified_id_factory = UnifiedIDFactory(1)
        unified_id = unified_id_factory.next()

        message = {
            "message-type": "archive-key-entire",
            "priority": archive_priority,
            "user-request-id": user_request_id,
            "collection-id": collection_id,
            "key": key,
            "unified-id": unified_id,
            "timestamp-repr": repr(timestamp),
            "conjoined-part": 0,
            "segment-num": segment_num,
            "segment-size": file_size,
            "zfec-padding-size": 4,
            "segment-adler32": file_adler32,
            "segment-md5-digest": b64encode(file_md5.digest()),
            "file-size": file_size,
            "file-adler32": file_adler32,
            "file-hash": b64encode(file_md5.digest()),
            "source-node-name": _local_node_name,
            "handoff-node-name": None,
            meta_key: meta_value
        }
        reply = send_request_and_get_reply(_local_node_name,
                                           _data_writer_address,
                                           _local_node_name,
                                           _client_address,
                                           message,
                                           data=content_item)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["user-request-id"], user_request_id)
        self.assertEqual(reply["result"], "success", reply["error-message"])
Exemple #10
0
    def xxxtest_destroy_tombstone(self):
        """test destroying a key that has already been destroyed"""
        file_size = 10 * 64 * 1024
        content_item = random_string(file_size)
        message_id = uuid.uuid1().hex
        collection_id = 1001
        key = self._key_generator.next()
        archive_priority = create_priority()
        archive_timestamp = create_timestamp()
        destroy_1_timestamp = archive_timestamp + timedelta(seconds=1)
        destroy_2_timestamp = destroy_1_timestamp + timedelta(seconds=1)
        segment_num = 2

        file_adler32 = zlib.adler32(content_item)
        file_md5 = hashlib.md5(content_item)

        message = {
            "message-type": "archive-key-entire",
            "message-id": message_id,
            "priority": archive_priority,
            "collection-id": collection_id,
            "key": key,
            "timestamp-repr": repr(archive_timestamp),
            "segment-num": segment_num,
            "segment-size": file_size,
            "segment-adler32": file_adler32,
            "segment-md5-digest": b64encode(file_md5.digest()),
            "file-size": file_size,
            "file-adler32": file_adler32,
            "file-hash": b64encode(file_md5.digest()),
            "handoff-node-name": None,
        }
        reply = send_request_and_get_reply(_local_node_name,
                                           _data_writer_address,
                                           _local_node_name,
                                           _client_address,
                                           message,
                                           data=content_item)
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        reply = self._destroy(collection_id, key, destroy_1_timestamp,
                              segment_num)
        self.assertEqual(reply["result"], "success", reply["error-message"])

        reply = self._destroy(collection_id, key, destroy_2_timestamp,
                              segment_num)
        self.assertEqual(reply["result"], "success", reply["error-message"])
    def test_archive_key_entire(self):
        """test archiving all data for a key in a single message"""
        file_size = 10 * 64 * 1024
        content_item = random_string(file_size) 
        message_id = uuid.uuid1().hex
        collection_id = 1001
        key  = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 2

        file_adler32 = zlib.adler32(content_item)
        file_md5 = hashlib.md5(content_item)

        message = {
            "message-type"      : "archive-key-entire",
            "message-id"        : message_id,
            "priority"          : archive_priority,
            "collection-id"     : collection_id,
            "key"               : key, 
            "timestamp-repr"    : repr(timestamp),
            "segment-num"       : segment_num,
            "segment-size"      : file_size,
            "segment-adler32"   : file_adler32,
            "segment-md5-digest": b64encode(file_md5.digest()),
            "file-size"         : file_size,
            "file-adler32"      : file_adler32,
            "file-hash"         : b64encode(file_md5.digest()),
            "handoff-node-name" : None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=content_item
        )
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")
    def xxxtest_large_archive(self):

        """
        test archiving a file that needs more than one message.
        For example, a 10 Mb file: each node would get 10 120kb 
        zefec shares.
        """
        slice_size = 1024 * 1024
        slice_count = 10
        total_size = slice_size * slice_count
        test_data = random_string(total_size)

        user_request_id = uuid.uuid1().hex

        collection_id = 1001
        archive_priority = create_priority()
        timestamp = create_timestamp()
        key  = self._key_generator.next()
        segment_num = 4
        sequence_num = 0

        file_adler32 = zlib.adler32(test_data)
        file_md5 = hashlib.md5(test_data)

        slice_start = 0
        slice_end = slice_size

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        unified_id_factory = UnifiedIDFactory(1)
        unified_id = unified_id_factory.next()

        message = {
            "message-type"      : "archive-key-start",
            "priority"          : archive_priority,
            "user-request-id"   : user_request_id,
            "collection-id"     : collection_id,
            "key"               : key, 
            "unified-id"        : unified_id,
            "timestamp-repr"    : repr(timestamp),
            "conjoined-part"    : 0,
            "segment-num"       : segment_num,
            "segment-size"      : len(test_data[slice_start:slice_end]),
            "zfec-padding-size" : 4,
            "segment-md5-digest": b64encode(segment_md5.digest()),
            "segment-adler32"   : segment_adler32,
            "sequence-num"      : sequence_num,
            "source-node-name"  : _local_node_name,
            "handoff-node-name" : None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=test_data[slice_start:slice_end]
        )
        self.assertEqual(reply["message-type"], "archive-key-start-reply")
        self.assertEqual(reply["user-request-id"], user_request_id)
        self.assertEqual(reply["result"], "success", reply["error-message"])

        for _ in range(slice_count-2):
            sequence_num += 1
            slice_start += slice_size
            slice_end += slice_size

            segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
            segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

            message_id = uuid.uuid1().hex
            message = {
                "message-type"      : "archive-key-next",
                "priority"          : archive_priority,
                "user-request-id"   : user_request_id,
                "collection-id"     : collection_id,
                "key"               : key, 
                "unified-id"        : unified_id,
                "timestamp-repr"    : repr(timestamp),
                "conjoined-part"    : 0,
                "segment-num"       : segment_num,
                "segment-size"      : len(test_data[slice_start:slice_end]),
                "zfec-padding-size" : 4,
                "segment-md5-digest": b64encode(segment_md5.digest()),
                "segment-adler32"   : segment_adler32,
                "sequence-num"      : sequence_num,
                "source-node-name"  : _local_node_name,
                "handoff-node-name" : None,
            }
            reply = send_request_and_get_reply(
                _local_node_name,
                _data_writer_address, 
                _local_node_name,
                _client_address,
                message, 
                data=test_data[slice_start:slice_end]
            )
            self.assertEqual(reply["message-type"], "archive-key-next-reply")
            self.assertEqual(reply["user-request-id"], user_request_id)
            self.assertEqual(reply["result"], "success", reply["error-message"])
        
        sequence_num += 1
        slice_start += slice_size
        slice_end += slice_size

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        message = {
            "message-type"      : "archive-key-final",
            "priority"          : archive_priority,
            "user-request-id"   : user_request_id,
            "collection-id"     : collection_id,
            "key"               : key, 
            "unified-id"        : unified_id,
            "timestamp-repr"    : repr(timestamp),
            "conjoined-part"    : 0,
            "segment-num"       : segment_num,
            "segment-size"      : len(test_data[slice_start:slice_end]),
            "zfec-padding-size" : 4,
            "segment-md5-digest": b64encode(segment_md5.digest()),
            "segment-adler32"   : segment_adler32,
            "sequence-num"      : sequence_num,
            "file-size"         : total_size,
            "file-adler32"      : file_adler32,
            "file-hash"         : b64encode(file_md5.digest()),
            "source-node-name"  : _local_node_name,
            "handoff-node-name" : None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=test_data[slice_start:slice_end]
        )

        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["user-request-id"], user_request_id)
        self.assertEqual(reply["result"], "success", reply["error-message"])
    def test_retrieve_small_content(self):
        """test retrieving content that fits in a single message"""
        file_size = 10 * 64 * 1024
        file_content = random_string(file_size) 
        collection_id = 1001
        key  = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 2

        file_adler32 = zlib.adler32(file_content)
        file_md5 = hashlib.md5(file_content)

        message_id = uuid.uuid1().hex
        message = {
            "message-type"              : "archive-key-entire",
            "message-id"                : message_id,
            "priority"                  : archive_priority,
            "collection-id"             : collection_id,
            "key"                       : key, 
            "conjoined-unified-id"      : None,
            "conjoined-part"            : 0,
            "timestamp-repr"            : repr(timestamp),
            "segment-num"               : segment_num,
            "segment-size"              : file_size,
            "segment-adler32"           : file_adler32,
            "segment-md5-digest"        : b64encode(file_md5.digest()),
            "file-size"                 : file_size,
            "file-adler32"              : file_adler32,
            "file-hash"                 : b64encode(file_md5.digest()),
            "handoff-node-name"         : None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=file_content
        )
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        # get file info from the local database
        _conjoined_row, segment_rows = current_status_of_key(
            self._database_connection, collection_id, key
        )

        self.assertEqual(len(segment_rows), 1)

        message_id = uuid.uuid1().hex
        message = {
            "message-type"              : "retrieve-key-start",
            "message-id"                : message_id,
            "collection-id"             : collection_id,
            "key"                       : key, 
            "timestamp-repr"            : repr(timestamp),
            "conjoined-unified-id"      : None,
            "conjoined-part"            : 0,
            "segment-num"               : segment_num
        }

        reply, data = send_request_and_get_reply_and_data(
            _local_node_name,
            _data_reader_address, 
            _local_node_name,
            _client_address,
            message 
        )

        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "retrieve-key-reply")
        self.assertEqual(reply["completed"], True)
        self.assertEqual(len(data), len(file_content))
        self.assertEqual(data, file_content)
Exemple #14
0
def _generate_key():
    """generate a key string"""
    return base64.b64encode(random_string(32)).rstrip('=')
Exemple #15
0
    def test_retrieve_large_content(self):
        """test retrieving content that fits in a multiple messages"""
        slice_size = 1024 * 1024
        slice_count = 10
        total_size = slice_size * slice_count
        test_data = random_string(total_size)

        collection_id = 1001
        archive_priority = create_priority()
        timestamp = create_timestamp()
        key = self._key_generator.next()
        segment_num = 4
        sequence_num = 0

        file_adler32 = zlib.adler32(test_data)
        file_md5 = hashlib.md5(test_data)

        slice_start = 0
        slice_end = slice_size

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        message_id = uuid.uuid1().hex
        message = {
            "message-type": "archive-key-start",
            "message-id": message_id,
            "priority": archive_priority,
            "collection-id": collection_id,
            "key": key,
            "conjoined-unified-id": None,
            "conjoined-part": 0,
            "timestamp-repr": repr(timestamp),
            "segment-num": segment_num,
            "segment-size": len(test_data[slice_start:slice_end]),
            "segment-adler32": segment_adler32,
            "segment-md5-digest": b64encode(segment_md5.digest()),
            "sequence-num": sequence_num,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address,
            _local_node_name,
            _client_address,
            message,
            data=test_data[slice_start:slice_end])
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-start-reply")
        self.assertEqual(reply["result"], "success")

        for _ in range(slice_count - 2):
            sequence_num += 1
            slice_start += slice_size
            slice_end += slice_size

            segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
            segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

            message_id = uuid.uuid1().hex
            message = {
                "message-type": "archive-key-next",
                "message-id": message_id,
                "priority": archive_priority,
                "collection-id": collection_id,
                "key": key,
                "conjoined-unified-id": None,
                "conjoined-part": 0,
                "timestamp-repr": repr(timestamp),
                "segment-num": segment_num,
                "segment-size": len(test_data[slice_start:slice_end]),
                "segment-adler32": segment_adler32,
                "segment-md5-digest": b64encode(segment_md5.digest()),
                "sequence-num": sequence_num,
            }
            reply = send_request_and_get_reply(
                _local_node_name,
                _data_writer_address,
                _local_node_name,
                _client_address,
                message,
                data=test_data[slice_start:slice_end])
            self.assertEqual(reply["message-id"], message_id)
            self.assertEqual(reply["message-type"], "archive-key-next-reply")
            self.assertEqual(reply["result"], "success")

        sequence_num += 1
        slice_start += slice_size
        slice_end += slice_size
        self.assertEqual(slice_end, total_size)

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        message_id = uuid.uuid1().hex
        message = {
            "message-type": "archive-key-final",
            "message-id": message_id,
            "priority": archive_priority,
            "collection-id": collection_id,
            "key": key,
            "conjoined-unified-id": None,
            "conjoined-part": 0,
            "timestamp-repr": repr(timestamp),
            "segment-num": segment_num,
            "segment-size": len(test_data[slice_start:slice_end]),
            "segment-adler32": segment_adler32,
            "segment-md5-digest": b64encode(segment_md5.digest()),
            "sequence-num": sequence_num,
            "file-size": total_size,
            "file-adler32": file_adler32,
            "file-hash": b64encode(file_md5.digest()),
            "handoff-node-name": None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address,
            _local_node_name,
            _client_address,
            message,
            data=test_data[slice_start:slice_end])
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        # get file info from the local database
        _conjoined_row, segment_rows = current_status_of_key(
            self._database_connection, collection_id, key)

        self.assertEqual(len(segment_rows), 1)

        retrieved_data_list = list()

        message_id = uuid.uuid1().hex
        message = {
            "message-type": "retrieve-key-start",
            "message-id": message_id,
            "collection-id": collection_id,
            "key": key,
            "timestamp-repr": repr(timestamp),
            "conjoined-unified-id": None,
            "conjoined-part": 0,
            "segment-num": segment_num
        }

        reply, data = send_request_and_get_reply_and_data(
            _local_node_name, _data_reader_address, _local_node_name,
            _client_address, message)

        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "retrieve-key-reply")
        self.assertEqual(reply["completed"], False)
        print "sequence-num =", reply["sequence-num"]

        retrieved_data_list.append(data)

        while True:
            message_id = uuid.uuid1().hex
            message = {
                "message-type": "retrieve-key-next",
                "message-id": message_id,
                "collection-id": collection_id,
                "key": key,
                "timestamp-repr": repr(timestamp),
                "conjoined-unified-id": None,
                "conjoined-part": 0,
                "segment-num": segment_num
            }

            reply, data = send_request_and_get_reply_and_data(
                _local_node_name, _data_reader_address, _local_node_name,
                _client_address, message)

            self.assertEqual(reply["message-id"], message_id)
            self.assertEqual(reply["message-type"], "retrieve-key-reply")
            retrieved_data_list.append(data)
            print "sequence-num =", reply["sequence-num"]

            if reply["completed"]:
                break

        retrieved_data = "".join(retrieved_data_list)
        self.assertEqual(len(retrieved_data), len(test_data))
        self.assertEqual(retrieved_data, test_data)
Exemple #16
0
    def test_retrieve_small_content(self):
        """test retrieving content that fits in a single message"""
        file_size = 10 * 64 * 1024
        file_content = random_string(file_size)
        collection_id = 1001
        key = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 2

        file_adler32 = zlib.adler32(file_content)
        file_md5 = hashlib.md5(file_content)

        message_id = uuid.uuid1().hex
        message = {
            "message-type": "archive-key-entire",
            "message-id": message_id,
            "priority": archive_priority,
            "collection-id": collection_id,
            "key": key,
            "conjoined-unified-id": None,
            "conjoined-part": 0,
            "timestamp-repr": repr(timestamp),
            "segment-num": segment_num,
            "segment-size": file_size,
            "segment-adler32": file_adler32,
            "segment-md5-digest": b64encode(file_md5.digest()),
            "file-size": file_size,
            "file-adler32": file_adler32,
            "file-hash": b64encode(file_md5.digest()),
            "handoff-node-name": None,
        }
        reply = send_request_and_get_reply(_local_node_name,
                                           _data_writer_address,
                                           _local_node_name,
                                           _client_address,
                                           message,
                                           data=file_content)
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        # get file info from the local database
        _conjoined_row, segment_rows = current_status_of_key(
            self._database_connection, collection_id, key)

        self.assertEqual(len(segment_rows), 1)

        message_id = uuid.uuid1().hex
        message = {
            "message-type": "retrieve-key-start",
            "message-id": message_id,
            "collection-id": collection_id,
            "key": key,
            "timestamp-repr": repr(timestamp),
            "conjoined-unified-id": None,
            "conjoined-part": 0,
            "segment-num": segment_num
        }

        reply, data = send_request_and_get_reply_and_data(
            _local_node_name, _data_reader_address, _local_node_name,
            _client_address, message)

        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "retrieve-key-reply")
        self.assertEqual(reply["completed"], True)
        self.assertEqual(len(data), len(file_content))
        self.assertEqual(data, file_content)
Exemple #17
0
    def xxxtest_large_archive(self):
        """
        test archiving a file that needs more than one message.
        For example, a 10 Mb file: each node would get 10 120kb 
        zefec shares.
        """
        slice_size = 1024 * 1024
        slice_count = 10
        total_size = slice_size * slice_count
        test_data = random_string(total_size)

        user_request_id = uuid.uuid1().hex

        collection_id = 1001
        archive_priority = create_priority()
        timestamp = create_timestamp()
        key = self._key_generator.next()
        segment_num = 4
        sequence_num = 0

        file_adler32 = zlib.adler32(test_data)
        file_md5 = hashlib.md5(test_data)

        slice_start = 0
        slice_end = slice_size

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        unified_id_factory = UnifiedIDFactory(1)
        unified_id = unified_id_factory.next()

        message = {
            "message-type": "archive-key-start",
            "priority": archive_priority,
            "user-request-id": user_request_id,
            "collection-id": collection_id,
            "key": key,
            "unified-id": unified_id,
            "timestamp-repr": repr(timestamp),
            "conjoined-part": 0,
            "segment-num": segment_num,
            "segment-size": len(test_data[slice_start:slice_end]),
            "zfec-padding-size": 4,
            "segment-md5-digest": b64encode(segment_md5.digest()),
            "segment-adler32": segment_adler32,
            "sequence-num": sequence_num,
            "source-node-name": _local_node_name,
            "handoff-node-name": None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address,
            _local_node_name,
            _client_address,
            message,
            data=test_data[slice_start:slice_end])
        self.assertEqual(reply["message-type"], "archive-key-start-reply")
        self.assertEqual(reply["user-request-id"], user_request_id)
        self.assertEqual(reply["result"], "success", reply["error-message"])

        for _ in range(slice_count - 2):
            sequence_num += 1
            slice_start += slice_size
            slice_end += slice_size

            segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
            segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

            message_id = uuid.uuid1().hex
            message = {
                "message-type": "archive-key-next",
                "priority": archive_priority,
                "user-request-id": user_request_id,
                "collection-id": collection_id,
                "key": key,
                "unified-id": unified_id,
                "timestamp-repr": repr(timestamp),
                "conjoined-part": 0,
                "segment-num": segment_num,
                "segment-size": len(test_data[slice_start:slice_end]),
                "zfec-padding-size": 4,
                "segment-md5-digest": b64encode(segment_md5.digest()),
                "segment-adler32": segment_adler32,
                "sequence-num": sequence_num,
                "source-node-name": _local_node_name,
                "handoff-node-name": None,
            }
            reply = send_request_and_get_reply(
                _local_node_name,
                _data_writer_address,
                _local_node_name,
                _client_address,
                message,
                data=test_data[slice_start:slice_end])
            self.assertEqual(reply["message-type"], "archive-key-next-reply")
            self.assertEqual(reply["user-request-id"], user_request_id)
            self.assertEqual(reply["result"], "success",
                             reply["error-message"])

        sequence_num += 1
        slice_start += slice_size
        slice_end += slice_size

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        message = {
            "message-type": "archive-key-final",
            "priority": archive_priority,
            "user-request-id": user_request_id,
            "collection-id": collection_id,
            "key": key,
            "unified-id": unified_id,
            "timestamp-repr": repr(timestamp),
            "conjoined-part": 0,
            "segment-num": segment_num,
            "segment-size": len(test_data[slice_start:slice_end]),
            "zfec-padding-size": 4,
            "segment-md5-digest": b64encode(segment_md5.digest()),
            "segment-adler32": segment_adler32,
            "sequence-num": sequence_num,
            "file-size": total_size,
            "file-adler32": file_adler32,
            "file-hash": b64encode(file_md5.digest()),
            "source-node-name": _local_node_name,
            "handoff-node-name": None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address,
            _local_node_name,
            _client_address,
            message,
            data=test_data[slice_start:slice_end])

        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["user-request-id"], user_request_id)
        self.assertEqual(reply["result"], "success", reply["error-message"])
    def test_retrieve_large_content(self):
        """test retrieving content that fits in a multiple messages"""
        slice_size = 1024 * 1024
        slice_count = 10
        total_size = slice_size * slice_count
        test_data = random_string(total_size)

        collection_id = 1001
        archive_priority = create_priority()
        timestamp = create_timestamp()
        key  = self._key_generator.next()
        segment_num = 4
        sequence_num = 0

        file_adler32 = zlib.adler32(test_data)
        file_md5 = hashlib.md5(test_data)

        slice_start = 0
        slice_end = slice_size

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        message_id = uuid.uuid1().hex
        message = {
            "message-type"              : "archive-key-start",
            "message-id"                : message_id,
            "priority"                  : archive_priority,
            "collection-id"             : collection_id,
            "key"                       : key, 
            "conjoined-unified-id"      : None,
            "conjoined-part"            : 0,
            "timestamp-repr"            : repr(timestamp),
            "segment-num"               : segment_num,
            "segment-size"              : len(test_data[slice_start:slice_end]),
            "segment-adler32"           : segment_adler32,
            "segment-md5-digest"        : b64encode(segment_md5.digest()),
            "sequence-num"              : sequence_num,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=test_data[slice_start:slice_end]
        )
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-start-reply")
        self.assertEqual(reply["result"], "success")

        for _ in range(slice_count-2):
            sequence_num += 1
            slice_start += slice_size
            slice_end += slice_size
            
            segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
            segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

            message_id = uuid.uuid1().hex
            message = {
                "message-type"              : "archive-key-next",
                "message-id"                : message_id,
                "priority"                  : archive_priority,
                "collection-id"             : collection_id,
                "key"                       : key, 
                "conjoined-unified-id"      : None,
                "conjoined-part"            : 0,
                "timestamp-repr"            : repr(timestamp),
                "segment-num"               : segment_num,
                "segment-size"              : len(
                    test_data[slice_start:slice_end]
                ),
                "segment-adler32"           : segment_adler32,
                "segment-md5-digest"        : b64encode(segment_md5.digest()),
                "sequence-num"              : sequence_num,
            }
            reply = send_request_and_get_reply(
                _local_node_name,
                _data_writer_address, 
                _local_node_name,
                _client_address,
                message, 
                data=test_data[slice_start:slice_end]
            )
            self.assertEqual(reply["message-id"], message_id)
            self.assertEqual(reply["message-type"], "archive-key-next-reply")
            self.assertEqual(reply["result"], "success")
        
        sequence_num += 1
        slice_start += slice_size
        slice_end += slice_size
        self.assertEqual(slice_end, total_size)

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        message_id = uuid.uuid1().hex
        message = {
            "message-type"              : "archive-key-final",
            "message-id"                : message_id,
            "priority"                  : archive_priority,
            "collection-id"             : collection_id,
            "key"                       : key, 
            "conjoined-unified-id"      : None,
            "conjoined-part"            : 0,
            "timestamp-repr"            : repr(timestamp),
            "segment-num"               : segment_num,
            "segment-size"              : len(test_data[slice_start:slice_end]),
            "segment-adler32"           : segment_adler32,
            "segment-md5-digest"        : b64encode(segment_md5.digest()),
            "sequence-num"              : sequence_num,
            "file-size"                 : total_size,
            "file-adler32"              : file_adler32,
            "file-hash"                 : b64encode(file_md5.digest()),
            "handoff-node-name"         : None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=test_data[slice_start:slice_end]
        )
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        # get file info from the local database
        _conjoined_row, segment_rows = current_status_of_key(
            self._database_connection, collection_id, key
        )

        self.assertEqual(len(segment_rows), 1)

        retrieved_data_list = list()

        message_id = uuid.uuid1().hex
        message = {
            "message-type"              : "retrieve-key-start",
            "message-id"                : message_id,
            "collection-id"             : collection_id,
            "key"                       : key, 
            "timestamp-repr"            : repr(timestamp),
            "conjoined-unified-id"      : None,
            "conjoined-part"            : 0,
            "segment-num"               : segment_num
        }

        reply, data = send_request_and_get_reply_and_data(
            _local_node_name,
            _data_reader_address, 
            _local_node_name,
            _client_address,
            message 
        )
        
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "retrieve-key-reply")
        self.assertEqual(reply["completed"], False)
        print "sequence-num =", reply["sequence-num"]

        retrieved_data_list.append(data)

        while True:
            message_id = uuid.uuid1().hex
            message = {
                "message-type"              : "retrieve-key-next",
                "message-id"                : message_id,
                "collection-id"             : collection_id,
                "key"                       : key, 
                "timestamp-repr"            : repr(timestamp),
                "conjoined-unified-id"      : None,
                "conjoined-part"            : 0,
                "segment-num"               : segment_num
            }

            reply, data = send_request_and_get_reply_and_data(
                _local_node_name,
                _data_reader_address, 
                _local_node_name,
                _client_address,
                message 
            )
            
            self.assertEqual(reply["message-id"], message_id)
            self.assertEqual(reply["message-type"], "retrieve-key-reply")
            retrieved_data_list.append(data)
            print "sequence-num =", reply["sequence-num"]

            if reply["completed"]:
                break

        retrieved_data = "".join(retrieved_data_list)
        self.assertEqual(len(retrieved_data), len(test_data))
        self.assertEqual(retrieved_data, test_data)