Example #1
0
 def test_chunks_with_leftovers(self):
     bytestring = b'a' * (2 * 1024 * 1024 + 20)
     chunks = chunk_hashes(bytestring)
     self.assertEqual(len(chunks), 3)
     self.assertEqual(chunks[0], sha256(b'a' * 1024 * 1024).digest())
     self.assertEqual(chunks[1], sha256(b'a' * 1024 * 1024).digest())
     self.assertEqual(chunks[2], sha256(b'a' * 20).digest())
Example #2
0
def resume_file_upload(vault, upload_id, part_size, fobj, part_hash_map,
                       chunk_size=_ONE_MEGABYTE):
    """Resume upload of a file already part-uploaded to Glacier.

    The resumption of an upload where the part-uploaded section is empty is a
    valid degenerate case that this function can handle. In this case,
    part_hash_map should be an empty dict.

    :param vault: boto.glacier.vault.Vault object.
    :param upload_id: existing Glacier upload id of upload being resumed.
    :param part_size: part size of existing upload.
    :param fobj: file object containing local data to resume. This must read
        from the start of the entire upload, not just from the point being
        resumed. Use fobj.seek(0) to achieve this if necessary.
    :param part_hash_map: {part_index: part_tree_hash, ...} of data already
        uploaded. Each supplied part_tree_hash will be verified and the part
        re-uploaded if there is a mismatch.
    :param chunk_size: chunk size of tree hash calculation. This must be
        1 MiB for Amazon.

    """
    uploader = _Uploader(vault, upload_id, part_size, chunk_size)
    for part_index, part_data in enumerate(
            generate_parts_from_fobj(fobj, part_size)):
        part_tree_hash = tree_hash(chunk_hashes(part_data, chunk_size))
        if (part_index not in part_hash_map or
                part_hash_map[part_index] != part_tree_hash):
            uploader.upload_part(part_index, part_data)
        else:
            uploader.skip_part(part_index, part_tree_hash, len(part_data))
    uploader.close()
    return uploader.archive_id
Example #3
0
 def calculate_tree_hash(self, bytestring):
     start = time.time()
     calculated = bytes_to_hex(tree_hash(chunk_hashes(bytestring)))
     end = time.time()
     logging.debug("Tree hash calc time for length %s: %s", len(bytestring),
                   end - start)
     return calculated
Example #4
0
 def test_chunks_with_leftovers(self):
     bytestring = 'a' * (2 * 1024 * 1024 + 20)
     chunks = chunk_hashes(bytestring)
     self.assertEqual(len(chunks), 3)
     self.assertEqual(chunks[0], sha256('a' * 1024 * 1024).digest())
     self.assertEqual(chunks[1], sha256('a' * 1024 * 1024).digest())
     self.assertEqual(chunks[2], sha256('a' * 20).digest())
Example #5
0
def resume_file_upload(vault, upload_id, part_size, fobj, part_hash_map,
                       chunk_size=_ONE_MEGABYTE):
    """Resume upload of a file already part-uploaded to Glacier.

    The resumption of an upload where the part-uploaded section is empty is a
    valid degenerate case that this function can handle. In this case,
    part_hash_map should be an empty dict.

    :param vault: boto.glacier.vault.Vault object.
    :param upload_id: existing Glacier upload id of upload being resumed.
    :param part_size: part size of existing upload.
    :param fobj: file object containing local data to resume. This must read
        from the start of the entire upload, not just from the point being
        resumed. Use fobj.seek(0) to achieve this if necessary.
    :param part_hash_map: {part_index: part_tree_hash, ...} of data already
        uploaded. Each supplied part_tree_hash will be verified and the part
        re-uploaded if there is a mismatch.
    :param chunk_size: chunk size of tree hash calculation. This must be
        1 MiB for Amazon.

    """
    uploader = _Uploader(vault, upload_id, part_size, chunk_size)
    for part_index, part_data in enumerate(
            generate_parts_from_fobj(fobj, part_size)):
        part_tree_hash = tree_hash(chunk_hashes(part_data, chunk_size))
        if (part_index not in part_hash_map or
                part_hash_map[part_index] != part_tree_hash):
            uploader.upload_part(part_index, part_data)
        else:
            uploader.skip_part(part_index, part_tree_hash, len(part_data))
    uploader.close()
    return uploader.archive_id
Example #6
0
    def upload_part(self, part_index, part_data):
        """Upload a part to Glacier.

        :param part_index: part number where 0 is the first part
        :param part_data: data to upload corresponding to this part

        """
        if self.closed:
            raise ValueError("I/O operation on closed file")
        # Create a request and sign it
        part_tree_hash = tree_hash(chunk_hashes(part_data, self.chunk_size))
        self._insert_tree_hash(part_index, part_tree_hash)

        hex_tree_hash = bytes_to_hex(part_tree_hash)
        linear_hash = hashlib.sha256(part_data).hexdigest()
        start = self.part_size * part_index
        content_range = (start,
                         (start + len(part_data)) - 1)
        response = self.vault.layer1.upload_part(self.vault.name,
                                                 self.upload_id,
                                                 linear_hash,
                                                 hex_tree_hash,
                                                 content_range, part_data)
        response.read()
        self._uploaded_size += len(part_data)
Example #7
0
 def calculate_tree_hash(self, bytestring):
     start = time.time()
     calculated = bytes_to_hex(tree_hash(chunk_hashes(bytestring)))
     end = time.time()
     logging.debug("Tree hash calc time for length %s: %s",
                   len(bytestring), end - start)
     return calculated
Example #8
0
    def upload_part(self, part_index, part_data):
        """Upload a part to Glacier.

        :param part_index: part number where 0 is the first part
        :param part_data: data to upload corresponding to this part

        """
        if self.closed:
            raise ValueError("I/O operation on closed file")
        # Create a request and sign it
        part_tree_hash = tree_hash(chunk_hashes(part_data, self.chunk_size))
        self._insert_tree_hash(part_index, part_tree_hash)

        hex_tree_hash = bytes_to_hex(part_tree_hash)
        linear_hash = hashlib.sha256(part_data).hexdigest()
        start = self.part_size * part_index
        content_range = (start,
                         (start + len(part_data)) - 1)
        response = self.vault.layer1.upload_part(self.vault.name,
                                                 self.upload_id,
                                                 linear_hash,
                                                 hex_tree_hash,
                                                 content_range, part_data)
        response.read()
        self._uploaded_size += len(part_data)
Example #9
0
 def _upload_chunk(self, work):
     part_number, part_size = work
     start_byte = part_number * part_size
     self._fileobj.seek(start_byte)
     contents = self._fileobj.read(part_size)
     linear_hash = hashlib.sha256(contents).hexdigest()
     tree_hash_bytes = tree_hash(chunk_hashes(contents))
     byte_range = (start_byte, start_byte + len(contents) - 1)
     log.debug("Uploading chunk %s of size %s", part_number, part_size)
     response = self._api.upload_part(self._vault_name, self._upload_id,
                                      linear_hash,
                                      bytes_to_hex(tree_hash_bytes),
                                      byte_range, contents)
     # Reading the response allows the connection to be reused.
     response.read()
     return (part_number, tree_hash_bytes)
Example #10
0
 def _upload_chunk(self, work):
     part_number, part_size = work
     start_byte = part_number * part_size
     self._fileobj.seek(start_byte)
     contents = self._fileobj.read(part_size)
     linear_hash = hashlib.sha256(contents).hexdigest()
     tree_hash_bytes = tree_hash(chunk_hashes(contents))
     byte_range = (start_byte, start_byte + len(contents) - 1)
     log.debug("Uploading chunk %s of size %s", part_number, part_size)
     response = self._api.upload_part(self._vault_name, self._upload_id,
                                      linear_hash,
                                      bytes_to_hex(tree_hash_bytes),
                                      byte_range, contents)
     # Reading the response allows the connection to be reused.
     response.read()
     return (part_number, tree_hash_bytes)
Example #11
0
def calculate_mock_vault_calls(data, part_size, chunk_size):
    upload_part_calls = []
    data_tree_hashes = []
    for i, data_part in enumerate(partify(data, part_size)):
        start = i * part_size
        end = start + len(data_part)
        data_part_tree_hash_blob = tree_hash(
            chunk_hashes(data_part, chunk_size))
        data_part_tree_hash = bytes_to_hex(data_part_tree_hash_blob)
        data_part_linear_hash = sha256(data_part).hexdigest()
        upload_part_calls.append(
            call.layer1.upload_part(sentinel.vault_name, sentinel.upload_id,
                                    data_part_linear_hash, data_part_tree_hash,
                                    (start, end - 1), data_part))
        data_tree_hashes.append(data_part_tree_hash_blob)

    return upload_part_calls, data_tree_hashes
Example #12
0
def calculate_mock_vault_calls(data, part_size, chunk_size):
    upload_part_calls = []
    data_tree_hashes = []
    for i, data_part in enumerate(partify(data, part_size)):
        start = i * part_size
        end = start + len(data_part)
        data_part_tree_hash_blob = tree_hash(
            chunk_hashes(data_part, chunk_size))
        data_part_tree_hash = bytes_to_hex(data_part_tree_hash_blob)
        data_part_linear_hash = sha256(data_part).hexdigest()
        upload_part_calls.append(
            call.layer1.upload_part(
                sentinel.vault_name, sentinel.upload_id,
                data_part_linear_hash, data_part_tree_hash,
                (start, end - 1), data_part))
        data_tree_hashes.append(data_part_tree_hash_blob)

    return upload_part_calls, data_tree_hashes
Example #13
0
    def _download_chunk(self, work):
        """
        Downloads a chunk of archive from Glacier. Saves the data to a temp file
        Returns the part number and temp file location

        :param work:
        """
        part_number, part_size = work
        start_byte = part_number * part_size
        byte_range = (start_byte, start_byte + part_size - 1)
        log.debug("Downloading chunk %s of size %s", part_number, part_size)
        response = self._job.get_output(byte_range)
        data = response.read()
        actual_hash = bytes_to_hex(tree_hash(chunk_hashes(data)))
        if response['TreeHash'] != actual_hash:
            raise TreeHashDoesNotMatchError(
                "Tree hash for part number %s does not match, "
                "expected: %s, got: %s" %
                (part_number, response['TreeHash'], actual_hash))
        return (part_number, part_size, binascii.unhexlify(actual_hash), data)
Example #14
0
    def _download_chunk(self, work):
        """
        Downloads a chunk of archive from Glacier. Saves the data to a temp file
        Returns the part number and temp file location

        :param work:
        """
        part_number, part_size = work
        start_byte = part_number * part_size
        byte_range = (start_byte, start_byte + part_size - 1)
        log.debug("Downloading chunk %s of size %s", part_number, part_size)
        response = self._job.get_output(byte_range)
        data = response.read()
        actual_hash = bytes_to_hex(tree_hash(chunk_hashes(data)))
        if response['TreeHash'] != actual_hash:
            raise TreeHashDoesNotMatchError(
                "Tree hash for part number %s does not match, "
                "expected: %s, got: %s" % (part_number, response['TreeHash'],
                                           actual_hash))
        return (part_number, part_size, binascii.unhexlify(actual_hash), data)
Example #15
0
    def check_no_resume(self, data, resume_set=set()):
        fobj = StringIO(data)
        part_hash_map = {}
        for part_index in resume_set:
            start = self.part_size * part_index
            end = start + self.part_size
            part_data = data[start:end]
            part_hash_map[part_index] = tree_hash(
                chunk_hashes(part_data, self.chunk_size))

        resume_file_upload(self.vault, sentinel.upload_id, self.part_size,
                           fobj, part_hash_map, self.chunk_size)

        upload_part_calls, data_tree_hashes = calculate_mock_vault_calls(
            data, self.part_size, self.chunk_size)
        resume_upload_part_calls = [
            call for part_index, call in enumerate(upload_part_calls)
            if part_index not in resume_set
        ]
        check_mock_vault_calls(self.vault, resume_upload_part_calls,
                               data_tree_hashes, len(data))
Example #16
0
    def check_no_resume(self, data, resume_set=set()):
        fobj = StringIO(data)
        part_hash_map = {}
        for part_index in resume_set:
            start = self.part_size * part_index
            end = start + self.part_size
            part_data = data[start:end]
            part_hash_map[part_index] = tree_hash(
                chunk_hashes(part_data, self.chunk_size))

        resume_file_upload(
            self.vault, sentinel.upload_id, self.part_size, fobj,
            part_hash_map, self.chunk_size)

        upload_part_calls, data_tree_hashes = calculate_mock_vault_calls(
            data, self.part_size, self.chunk_size)
        resume_upload_part_calls = [
            call for part_index, call in enumerate(upload_part_calls)
                    if part_index not in resume_set]
        check_mock_vault_calls(
            self.vault, resume_upload_part_calls, data_tree_hashes, len(data))
Example #17
0
 def test_less_than_one_chunk(self):
     chunks = chunk_hashes('aaaa')
     self.assertEqual(len(chunks), 1)
     self.assertEqual(chunks[0], sha256('aaaa').digest())
Example #18
0
 def test_chunk_hashes_exact(self):
     chunks = chunk_hashes(b'a' * (2 * 1024 * 1024))
     self.assertEqual(len(chunks), 2)
     self.assertEqual(chunks[0], sha256(b'a' * 1024 * 1024).digest())
Example #19
0
 def test_chunk_hashes_exact(self):
     chunks = chunk_hashes('a' * (2 * 1024 * 1024))
     self.assertEqual(len(chunks), 2)
     self.assertEqual(chunks[0], sha256('a' * 1024 * 1024).digest())
Example #20
0
 def test_less_than_one_chunk(self):
     chunks = chunk_hashes(b'aaaa')
     self.assertEqual(len(chunks), 1)
     self.assertEqual(chunks[0], sha256(b'aaaa').digest())