Ejemplo n.º 1
0
 def calculate_tree_hash(self, bytestring):
     start = time.time()
     calculated = bytes_to_hex(tree_hash(chunk_hashes(bytestring)))
     end = time.time()
     logging.debug("Tree hash calc time for length %s: %s", len(bytestring),
                   end - start)
     return calculated
Ejemplo n.º 2
0
    def upload_part(self, part_index, part_data):
        """Upload a part to Glacier.

        :param part_index: part number where 0 is the first part
        :param part_data: data to upload corresponding to this part

        """
        if self.closed:
            raise ValueError("I/O operation on closed file")
        # Create a request and sign it
        part_tree_hash = tree_hash(chunk_hashes(part_data, self.chunk_size))
        self._insert_tree_hash(part_index, part_tree_hash)

        hex_tree_hash = bytes_to_hex(part_tree_hash)
        linear_hash = hashlib.sha256(part_data).hexdigest()
        start = self.part_size * part_index
        content_range = (start,
                         (start + len(part_data)) - 1)
        response = self.vault.layer1.upload_part(self.vault.name,
                                                 self.upload_id,
                                                 linear_hash,
                                                 hex_tree_hash,
                                                 content_range, part_data)
        response.read()
        self._uploaded_size += len(part_data)
Ejemplo n.º 3
0
    def upload_part(self, part_index, part_data):
        """Upload a part to Glacier.

        :param part_index: part number where 0 is the first part
        :param part_data: data to upload corresponding to this part

        """
        if self.closed:
            raise ValueError("I/O operation on closed file")
        # Create a request and sign it
        part_tree_hash = tree_hash(chunk_hashes(part_data, self.chunk_size))
        self._insert_tree_hash(part_index, part_tree_hash)

        hex_tree_hash = bytes_to_hex(part_tree_hash)
        linear_hash = hashlib.sha256(part_data).hexdigest()
        start = self.part_size * part_index
        content_range = (start,
                         (start + len(part_data)) - 1)
        response = self.vault.layer1.upload_part(self.vault.name,
                                                 self.upload_id,
                                                 linear_hash,
                                                 hex_tree_hash,
                                                 content_range, part_data)
        response.read()
        self._uploaded_size += len(part_data)
Ejemplo n.º 4
0
 def calculate_tree_hash(self, bytestring):
     start = time.time()
     calculated = bytes_to_hex(tree_hash(chunk_hashes(bytestring)))
     end = time.time()
     logging.debug("Tree hash calc time for length %s: %s",
                   len(bytestring), end - start)
     return calculated
Ejemplo n.º 5
0
def check_mock_vault_calls(vault, upload_part_calls, data_tree_hashes,
                           data_len):
    vault.layer1.upload_part.assert_has_calls(upload_part_calls,
                                              any_order=True)
    assert_equal(len(upload_part_calls), vault.layer1.upload_part.call_count)

    data_tree_hash = bytes_to_hex(tree_hash(data_tree_hashes))
    vault.layer1.complete_multipart_upload.assert_called_once_with(
        sentinel.vault_name, sentinel.upload_id, data_tree_hash, data_len)
Ejemplo n.º 6
0
def check_mock_vault_calls(vault, upload_part_calls, data_tree_hashes,
                           data_len):
    vault.layer1.upload_part.assert_has_calls(
        upload_part_calls, any_order=True)
    assert_equal(
        len(upload_part_calls), vault.layer1.upload_part.call_count)

    data_tree_hash = bytes_to_hex(tree_hash(data_tree_hashes))
    vault.layer1.complete_multipart_upload.assert_called_once_with(
        sentinel.vault_name, sentinel.upload_id, data_tree_hash, data_len)
Ejemplo n.º 7
0
 def close(self):
     if self.closed:
         return
     if None in self._tree_hashes:
         raise RuntimeError("Some parts were not uploaded.")
     # Complete the multiplart glacier upload
     hex_tree_hash = bytes_to_hex(tree_hash(self._tree_hashes))
     response = self.vault.layer1.complete_multipart_upload(
         self.vault.name, self.upload_id, hex_tree_hash,
         self._uploaded_size)
     self.archive_id = response['ArchiveId']
     self.closed = True
Ejemplo n.º 8
0
 def close(self):
     if self.closed:
         return
     if None in self._tree_hashes:
         raise RuntimeError("Some parts were not uploaded.")
     # Complete the multiplart glacier upload
     hex_tree_hash = bytes_to_hex(tree_hash(self._tree_hashes))
     response = self.vault.layer1.complete_multipart_upload(
         self.vault.name, self.upload_id, hex_tree_hash,
         self._uploaded_size)
     self.archive_id = response['ArchiveId']
     self.closed = True
Ejemplo n.º 9
0
    def upload(self, filename, description=None):
        """Concurrently create an archive.

        The part_size value specified when the class was constructed
        will be used *unless* it is smaller than the minimum required
        part size needed for the size of the given file.  In that case,
        the part size used will be the minimum part size required
        to properly upload the given file.

        :type file: str
        :param file: The filename to upload

        :type description: str
        :param description: The description of the archive.

        :rtype: str
        :return: The archive id of the newly created archive.

        """
        total_size = os.stat(filename).st_size
        total_parts, part_size = self._calculate_required_part_size(total_size)
        hash_chunks = [None] * total_parts
        worker_queue = Queue()
        result_queue = Queue()
        response = self._api.initiate_multipart_upload(self._vault_name,
                                                       part_size,
                                                       description)
        upload_id = response['UploadId']
        # The basic idea is to add the chunks (the offsets not the actual
        # contents) to a work queue, start up a thread pool, let the crank
        # through the items in the work queue, and then place their results
        # in a result queue which we use to complete the multipart upload.
        self._add_work_items_to_queue(total_parts, worker_queue, part_size)
        self._start_upload_threads(result_queue, upload_id,
                                   worker_queue, filename)
        try:
            self._wait_for_upload_threads(hash_chunks, result_queue,
                                          total_parts)
        except UploadArchiveError as e:
            log.debug("An error occurred while uploading an archive, "
                      "aborting multipart upload.")
            self._api.abort_multipart_upload(self._vault_name, upload_id)
            raise e
        log.debug("Completing upload.")
        response = self._api.complete_multipart_upload(
            self._vault_name, upload_id, bytes_to_hex(tree_hash(hash_chunks)),
            total_size)
        log.debug("Upload finished.")
        return response['ArchiveId']
Ejemplo n.º 10
0
    def upload(self, filename, description=None):
        """Concurrently create an archive.

        The part_size value specified when the class was constructed
        will be used *unless* it is smaller than the minimum required
        part size needed for the size of the given file.  In that case,
        the part size used will be the minimum part size required
        to properly upload the given file.

        :type file: str
        :param file: The filename to upload

        :type description: str
        :param description: The description of the archive.

        :rtype: str
        :return: The archive id of the newly created archive.

        """
        total_size = os.stat(filename).st_size
        total_parts, part_size = self._calculate_required_part_size(total_size)
        hash_chunks = [None] * total_parts
        worker_queue = Queue()
        result_queue = Queue()
        response = self._api.initiate_multipart_upload(self._vault_name,
                                                       part_size,
                                                       description)
        upload_id = response['UploadId']
        # The basic idea is to add the chunks (the offsets not the actual
        # contents) to a work queue, start up a thread pool, let the crank
        # through the items in the work queue, and then place their results
        # in a result queue which we use to complete the multipart upload.
        self._add_work_items_to_queue(total_parts, worker_queue, part_size)
        self._start_upload_threads(result_queue, upload_id,
                                   worker_queue, filename)
        try:
            self._wait_for_upload_threads(hash_chunks, result_queue,
                                          total_parts)
        except UploadArchiveError as e:
            log.debug("An error occurred while uploading an archive, "
                      "aborting multipart upload.")
            self._api.abort_multipart_upload(self._vault_name, upload_id)
            raise e
        log.debug("Completing upload.")
        response = self._api.complete_multipart_upload(
            self._vault_name, upload_id, bytes_to_hex(tree_hash(hash_chunks)),
            total_size)
        log.debug("Upload finished.")
        return response['ArchiveId']
Ejemplo n.º 11
0
 def _upload_chunk(self, work):
     part_number, part_size = work
     start_byte = part_number * part_size
     self._fileobj.seek(start_byte)
     contents = self._fileobj.read(part_size)
     linear_hash = hashlib.sha256(contents).hexdigest()
     tree_hash_bytes = tree_hash(chunk_hashes(contents))
     byte_range = (start_byte, start_byte + len(contents) - 1)
     log.debug("Uploading chunk %s of size %s", part_number, part_size)
     response = self._api.upload_part(self._vault_name, self._upload_id,
                                      linear_hash,
                                      bytes_to_hex(tree_hash_bytes),
                                      byte_range, contents)
     # Reading the response allows the connection to be reused.
     response.read()
     return (part_number, tree_hash_bytes)
Ejemplo n.º 12
0
 def _upload_chunk(self, work):
     part_number, part_size = work
     start_byte = part_number * part_size
     self._fileobj.seek(start_byte)
     contents = self._fileobj.read(part_size)
     linear_hash = hashlib.sha256(contents).hexdigest()
     tree_hash_bytes = tree_hash(chunk_hashes(contents))
     byte_range = (start_byte, start_byte + len(contents) - 1)
     log.debug("Uploading chunk %s of size %s", part_number, part_size)
     response = self._api.upload_part(self._vault_name, self._upload_id,
                                      linear_hash,
                                      bytes_to_hex(tree_hash_bytes),
                                      byte_range, contents)
     # Reading the response allows the connection to be reused.
     response.read()
     return (part_number, tree_hash_bytes)
Ejemplo n.º 13
0
def calculate_mock_vault_calls(data, part_size, chunk_size):
    upload_part_calls = []
    data_tree_hashes = []
    for i, data_part in enumerate(partify(data, part_size)):
        start = i * part_size
        end = start + len(data_part)
        data_part_tree_hash_blob = tree_hash(
            chunk_hashes(data_part, chunk_size))
        data_part_tree_hash = bytes_to_hex(data_part_tree_hash_blob)
        data_part_linear_hash = sha256(data_part).hexdigest()
        upload_part_calls.append(
            call.layer1.upload_part(sentinel.vault_name, sentinel.upload_id,
                                    data_part_linear_hash, data_part_tree_hash,
                                    (start, end - 1), data_part))
        data_tree_hashes.append(data_part_tree_hash_blob)

    return upload_part_calls, data_tree_hashes
Ejemplo n.º 14
0
def calculate_mock_vault_calls(data, part_size, chunk_size):
    upload_part_calls = []
    data_tree_hashes = []
    for i, data_part in enumerate(partify(data, part_size)):
        start = i * part_size
        end = start + len(data_part)
        data_part_tree_hash_blob = tree_hash(
            chunk_hashes(data_part, chunk_size))
        data_part_tree_hash = bytes_to_hex(data_part_tree_hash_blob)
        data_part_linear_hash = sha256(data_part).hexdigest()
        upload_part_calls.append(
            call.layer1.upload_part(
                sentinel.vault_name, sentinel.upload_id,
                data_part_linear_hash, data_part_tree_hash,
                (start, end - 1), data_part))
        data_tree_hashes.append(data_part_tree_hash_blob)

    return upload_part_calls, data_tree_hashes
Ejemplo n.º 15
0
    def _download_chunk(self, work):
        """
        Downloads a chunk of archive from Glacier. Saves the data to a temp file
        Returns the part number and temp file location

        :param work:
        """
        part_number, part_size = work
        start_byte = part_number * part_size
        byte_range = (start_byte, start_byte + part_size - 1)
        log.debug("Downloading chunk %s of size %s", part_number, part_size)
        response = self._job.get_output(byte_range)
        data = response.read()
        actual_hash = bytes_to_hex(tree_hash(chunk_hashes(data)))
        if response['TreeHash'] != actual_hash:
            raise TreeHashDoesNotMatchError(
                "Tree hash for part number %s does not match, "
                "expected: %s, got: %s" % (part_number, response['TreeHash'],
                                           actual_hash))
        return (part_number, part_size, binascii.unhexlify(actual_hash), data)
Ejemplo n.º 16
0
    def _download_chunk(self, work):
        """
        Downloads a chunk of archive from Glacier. Saves the data to a temp file
        Returns the part number and temp file location

        :param work:
        """
        part_number, part_size = work
        start_byte = part_number * part_size
        byte_range = (start_byte, start_byte + part_size - 1)
        log.debug("Downloading chunk %s of size %s", part_number, part_size)
        response = self._job.get_output(byte_range)
        data = response.read()
        actual_hash = bytes_to_hex(tree_hash(chunk_hashes(data)))
        if response['TreeHash'] != actual_hash:
            raise TreeHashDoesNotMatchError(
                "Tree hash for part number %s does not match, "
                "expected: %s, got: %s" %
                (part_number, response['TreeHash'], actual_hash))
        return (part_number, part_size, binascii.unhexlify(actual_hash), data)
Ejemplo n.º 17
0
    def _wait_for_download_threads(self, filename, result_queue, total_parts):
        """
        Waits until the result_queue is filled with all the downloaded parts
        This indicates that all part downloads have completed

        Saves downloaded parts into filename

        :param filename:
        :param result_queue:
        :param total_parts:
        """
        hash_chunks = [None] * total_parts
        with open(filename, "wb") as f:
            for _ in range(total_parts):
                result = result_queue.get()
                if isinstance(result, Exception):
                    log.debug(
                        "An error was found in the result queue, "
                        "terminating threads: %s", result)
                    self._shutdown_threads()
                    raise DownloadArchiveError(
                        "An error occurred while uploading "
                        "an archive: %s" % result)
                part_number, part_size, actual_hash, data = result
                hash_chunks[part_number] = actual_hash
                start_byte = part_number * part_size
                f.seek(start_byte)
                f.write(data)
                f.flush()
        final_hash = bytes_to_hex(tree_hash(hash_chunks))
        log.debug(
            "Verifying final tree hash of archive, expecting: %s, "
            "actual: %s", self._job.sha256_treehash, final_hash)
        if self._job.sha256_treehash != final_hash:
            self._shutdown_threads()
            raise TreeHashDoesNotMatchError(
                "Tree hash for entire archive does not match, "
                "expected: %s, got: %s" %
                (self._job.sha256_treehash, final_hash))
        self._shutdown_threads()
Ejemplo n.º 18
0
    def _wait_for_download_threads(self, filename, result_queue, total_parts):
        """
        Waits until the result_queue is filled with all the downloaded parts
        This indicates that all part downloads have completed

        Saves downloaded parts into filename

        :param filename:
        :param result_queue:
        :param total_parts:
        """
        hash_chunks = [None] * total_parts
        with open(filename, "wb") as f:
            for _ in range(total_parts):
                result = result_queue.get()
                if isinstance(result, Exception):
                    log.debug("An error was found in the result queue, "
                              "terminating threads: %s", result)
                    self._shutdown_threads()
                    raise DownloadArchiveError(
                        "An error occurred while uploading "
                        "an archive: %s" % result)
                part_number, part_size, actual_hash, data = result
                hash_chunks[part_number] = actual_hash
                start_byte = part_number * part_size
                f.seek(start_byte)
                f.write(data)
                f.flush()
        final_hash = bytes_to_hex(tree_hash(hash_chunks))
        log.debug("Verifying final tree hash of archive, expecting: %s, "
                  "actual: %s", self._job.sha256_treehash, final_hash)
        if self._job.sha256_treehash != final_hash:
            self._shutdown_threads()
            raise TreeHashDoesNotMatchError(
                "Tree hash for entire archive does not match, "
                "expected: %s, got: %s" % (self._job.sha256_treehash,
                                           final_hash))
        self._shutdown_threads()