Python calculate_tree_hash Exemples, botocore.utils.calculate_tree_hash Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : merkle_tree.py Projet : quiver/merkle_tree

def cli(filename, output):
    """calculate Merkle Tree
    """
    hash_value = calculate_tree_hash(filename)
    if output == 'binary':
        hash_value = binascii.unhexlify(hash_value)
    click.echo(hash_value)

Exemple #2

0

Afficher le fichier

Fichier : test_multipart_upload.py Projet : rayluo/botocook

def multipart_upload(filename, part_size=PART_SIZE):
    glacier = boto3.resource('glacier', region_name='us-west-2')
    # There's no error if the vault already exists so we don't
    # need to catch any exceptions here.
    vault = glacier.create_vault(vaultName='botocore-integ-test-vault')
    file_size = os.path.getsize(filename)

    # Initiate a multipart upload
    multipart_upload = vault.initiate_multipart_upload(
        archiveDescription='multipart upload', partSize=str(part_size))
    try:
        # Upload each part
        for i in range(file_size/part_size+1):
            range_from = i*part_size
            range_to = min((i+1)*part_size-1, file_size-1)
            body = ReadFileChunk.from_filename(filename, range_from, part_size)
            multipart_upload.upload_part(
                body=body, range='bytes %d-%d/*' % (range_from, range_to))

        # Complete a multipart upload transaction
        response = multipart_upload.complete(
            checksum=calculate_tree_hash(open(filename, 'rb')),  # NEEDED
            archiveSize=str(file_size))
        return vault.Archive(response['archiveId'])
    except:
        multipart_upload.abort()
        raise

Exemple #3

0

Afficher le fichier

Fichier : files_audit.py Projet : leb2dg/osf.io

def glacier_sync_multipart_upload(ctx, version, file_path, file_size):
    # Reference: https://boto3.readthedocs.io/en/latest/reference/services/glacier.html#Glacier.Vault.initiate_multipart_upload
    multipart_upload = ctx.vault.initiate_multipart_upload(
        archiveDescription=version.location['object'],
        partSize=str(GLACIER_PART_SIZE),
    )

    with open(file_path, 'rb') as fp:
        for byte_offset in range(0, file_size, GLACIER_PART_SIZE):
            part = fp.read(GLACIER_PART_SIZE)
            range_header = 'bytes {}-{}/{}'.format(byte_offset, byte_offset + len(part) - 1, file_size)
            multipart_upload.upload_part(
                range=range_header,
                body=part,
            )

    # TODO: Ideally this would be computed on upload, however this is also a good double check, so we do not incur any off-by-one issues.
    # see https://boto3.readthedocs.io/en/latest/reference/services/glacier.html#Glacier.MultipartUpload.complete
    checksum = calculate_tree_hash(open(file_path, 'rb'))
    response = multipart_upload.complete(
        archiveSize=str(file_size),
        checksum=checksum,
    )
    assert response['checksum'] == checksum

    return response['archiveId']

Exemple #4

0

Afficher le fichier

def multipart_upload(filename, part_size=PART_SIZE):
    glacier = boto3.resource('glacier', region_name='us-west-2')
    # There's no error if the vault already exists so we don't
    # need to catch any exceptions here.
    vault = glacier.create_vault(vaultName='botocore-integ-test-vault')
    file_size = os.path.getsize(filename)

    # Initiate a multipart upload
    multipart_upload = vault.initiate_multipart_upload(
        archiveDescription='multipart upload', partSize=str(part_size))
    try:
        # Upload each part
        for i in range(file_size / part_size + 1):
            range_from = i * part_size
            range_to = min((i + 1) * part_size - 1, file_size - 1)
            body = ReadFileChunk.from_filename(filename, range_from, part_size)
            multipart_upload.upload_part(body=body,
                                         range='bytes %d-%d/*' %
                                         (range_from, range_to))

        # Complete a multipart upload transaction
        response = multipart_upload.complete(
            checksum=calculate_tree_hash(open(filename, 'rb')),  # NEEDED
            archiveSize=str(file_size))
        return vault.Archive(response['archiveId'])
    except:
        multipart_upload.abort()
        raise

Exemple #5

0

Afficher le fichier

Fichier : handlers.py Projet : perrygeo/botocore

def add_glacier_checksums(params, **kwargs):
    """Add glacier checksums to the http request.

    This will add two headers to the http request:

        * x-amz-content-sha256
        * x-amz-sha256-tree-hash

    These values will only be added if they are not present
    in the HTTP request.

    """
    request_dict = params
    headers = request_dict['headers']
    body = request_dict['body']
    if isinstance(body, six.binary_type):
        # If the user provided a bytes type instead of a file
        # like object, we're temporarily create a BytesIO object
        # so we can use the util functions to calculate the
        # checksums which assume file like objects.  Note that
        # we're not actually changing the body in the request_dict.
        body = six.BytesIO(body)
    starting_position = body.tell()
    if 'x-amz-content-sha256' not in headers:
        headers['x-amz-content-sha256'] = utils.calculate_sha256(
            body, as_hex=True)
    body.seek(starting_position)
    if 'x-amz-sha256-tree-hash' not in headers:
        headers['x-amz-sha256-tree-hash'] = utils.calculate_tree_hash(body)
    body.seek(starting_position)

Exemple #6

0

Afficher le fichier

Fichier : handlers.py Projet : kkung/botocore

def add_glacier_checksums(params, **kwargs):
    """Add glacier checksums to the http request.

    This will add two headers to the http request:

        * x-amz-content-sha256
        * x-amz-sha256-tree-hash

    These values will only be added if they are not present
    in the HTTP request.

    """
    request_dict = params
    headers = request_dict['headers']
    body = request_dict['body']
    if isinstance(body, six.binary_type):
        # If the user provided a bytes type instead of a file
        # like object, we're temporarily create a BytesIO object
        # so we can use the util functions to calculate the
        # checksums which assume file like objects.  Note that
        # we're not actually changing the body in the request_dict.
        body = six.BytesIO(body)
    starting_position = body.tell()
    if 'x-amz-content-sha256' not in headers:
        headers['x-amz-content-sha256'] = utils.calculate_sha256(body,
                                                                 as_hex=True)
    body.seek(starting_position)
    if 'x-amz-sha256-tree-hash' not in headers:
        headers['x-amz-sha256-tree-hash'] = utils.calculate_tree_hash(body)
    body.seek(starting_position)

Exemple #7

0

Afficher le fichier

Fichier : glacier-backup.py Projet : gwsu2008/automation

def archive_file_to_glacier_multipart(session, vault_name, archive_desc, archive_file):
    """
    Upload file to AWS glacier vault.
    :param session:
    :param vault_name:
    :param archive_desc:
    :param archive_file:
    :return:
    """
    # use chunk size 10MB
    chunk_size = 1048576 * 32
    glacier = session.resource("glacier")
    logger.info("Upload %s to glacier vault %s" % (archive_file, vault_name))
    vault = glacier.Vault(account_id="-", name=vault_name)
    multipart_upload = vault.initiate_multipart_upload(
        accountId="-", archiveDescription=archive_desc, partSize=str(chunk_size))
    # upload_id = multipart_upload.id
    f = open(archive_file, "rb")
    start_range = 0
    for chunk in read_in_chunks(f, chunk_size):
        range_data = "bytes %s-%s/*" % (start_range, f.tell()-1)
        logger.info("Uploading range %s" % range_data)
        multipart_upload.upload_part(range=range_data, body=chunk)
        start_range = f.tell()

    f.seek(0)
    response = multipart_upload.complete(archiveSize=str(start_range),
                                         checksum=calculate_tree_hash(f))
    f.close()
    # archive_id = response.get('archiveId')
    return response

Exemple #8

0

Afficher le fichier

Fichier : main.py Projet : mangeld/glacier_manager

 def finalize_upload(self):
     self.file_handle.seek(0)
     filehash = calculate_tree_hash(self.file_handle)
     self.glacier.complete_multipart_upload(
         vaultName=self.vault,
         uploadId=self.multipart_upload_id,
         archiveSize=str(self.file_size),
         checksum=filehash,
     )

Exemple #9

0

Afficher le fichier

 def complete_multipart_upload(self, upload_id, archive):
     size = archive.tell()
     archive.seek(0)
     checksum = calculate_tree_hash(archive)
     response = self.client.complete_multipart_upload(
         vaultName=self.vault_name,
         uploadId=upload_id,
         archiveSize=str(size),
         checksum=checksum)
     return response['archiveId']

Exemple #10

0

Afficher le fichier

    def checksum(self):
        """ Calculate the checksum for the upload, as needed by Glacier API.

        :rtype: str
        """
        if self._checksum is None:
            self._checksum = calculate_tree_hash(self.data)
        if self.completed:
            self._data = None
        return self._checksum

Exemple #11

0

Afficher le fichier

Fichier : glacier_wrapper.py Projet : OpenTechStrategies/glacier-uploader

 def complete_multipart_upload(self, upload_id, archive):
     size = archive.tell()
     archive.seek(0)
     checksum = calculate_tree_hash(archive)
     response = self.client.complete_multipart_upload(
         vaultName = self.vault_name,
         uploadId = upload_id,
         archiveSize = str(size),
         checksum = checksum
     )
     return response['archiveId']

Exemple #12

0

Afficher le fichier

 def finalize(self):
     step = 'finalize'
     total_checksum = calculate_tree_hash(open(self.filepath, 'rb'))
     self.responses[step] = get_client().complete_multipart_upload(
         archiveSize=str(get_file_size(self.filepath)),
         checksum=total_checksum,
         uploadId=self.upload_id,
         vaultName=self.vault_name,
     )
     if self.responses[step].get('checksum', None) == total_checksum:
         self.archive_id = self.responses[step]['archiveId']
         logging.info('multipart upload %s finished with archive ID %s' %
                      (self.filepath, self.archive_id))
     else:
         logging.info('multipart upload %s failed' % self.filepath)

Exemple #13

0

Afficher le fichier

def multipart_upload(account_id, chunk_size, file_name, glacier_vault,
                     out_file):
    file_size = os.path.getsize(file_name)
    parts = math.ceil(file_size / chunk_size)

    client = boto3.client('glacier')
    try:
        # initiate initiate_multipart_upload
        upload_obj = client.initiate_multipart_upload(
            accountId=account_id,
            vaultName=glacier_vault,
            archiveDescription=file_name,
            partSize=str(chunk_size))
        print(upload_obj)
        # upload file in chunks
        with open(file_name, 'rb') as upload:
            for p in range(parts):
                lower = p * chunk_size
                upper = lower + chunk_size - 1
                if upper > file_size:
                    upper = file_size - 1
                file_part = upload.read(chunk_size)

                up_part = client.upload_multipart_part(
                    accountId=account_id,
                    vaultName=glacier_vault,
                    uploadId=upload_obj['uploadId'],
                    range='bytes {}-{}/*'.format(lower, upper),
                    body=file_part)
        # complete the upload
        checksum = calculate_tree_hash(open(file_name, 'rb'))
        complete_upload = client.complete_multipart_upload(
            accountId=account_id,
            vaultName=glacier_vault,
            uploadId=upload_obj['uploadId'],
            archiveSize=str(file_size),
            checksum=checksum)
    except ClientError as e:
        logging.error(e)
        sys.exit()

    print("complete archiving " + file_name + " in Glacier vault " +
          glacier_vault)
    print(complete_upload)

    with open(out_file, "w") as file:
        file.write(str(complete_upload))

Exemple #14

0

Afficher le fichier

 def upload(self):
     """
     Upload the archive to Amazon Glacier by chunking it.
     """
     self.start_upload()
     while not self.upload_part():
         continue
     # Move to the front of the file to calculate its hash
     self.zip_file.seek(0)
     zip_hash = calculate_tree_hash(self.zip_file)
     response = self.client.complete_multipart_upload(
             vaultName=self.vault_name,
             uploadId=self.upload_id,
             archiveSize=str(self.zip_file_size),
             checksum=zip_hash
     )
     return response

Exemple #15

0

Afficher le fichier

def upload_large_file(vault_name, filepath, description):
    """
    Do a multi part upload to glacier
    :param vault_name:
    :param filepath:
    :param description:
    :return:
    """
    glacier = boto3.resource("glacier")
    vault = glacier.Vault(account_id="-", name=vault_name)

    multipart_upload = vault.initiate_multipart_upload(
        accountId="-",
        archiveDescription=description,
        partSize=str(CHUNK_SIZE))
    upload_id = multipart_upload.id
    print("Upload id: {}".format(upload_id))

    with open(filepath, 'rb') as f:
        retrylist = upload_segments(multipart_upload,
                                    read_in_chunks(f, CHUNK_SIZE))

        f.seek(0, 2)
        fsize = f.tell()

        while len(retrylist) > 0:
            print("Retrying failed parts")
            # syntax turns list into a generator
            retrylist = upload_segments(multipart_upload,
                                        (i for i in retrylist))

        print("Finalizing upload {} ...".format(upload_id))
        f.seek(0)
        s256t_hash = calculate_tree_hash(f)
        response = multipart_upload.complete(archiveSize=str(fsize),
                                             checksum=s256t_hash)
        print("Hash: {}".format(s256t_hash))

    pprint.pprint(response)
    return response

Exemple #16

0

Afficher le fichier

def upload_archive(client, vault_name, file_name, description, concurrency):
    file_size = os.stat(file_name).st_size
    part_size = calculate_part_size(file_size)

    info('Initiating multipart upload')
    upload_id = client.initiate_multipart_upload(vaultName=vault_name, partSize=str(part_size),
                                                 archiveDescription=description)['uploadId']

    parts = generate_archive_parts(vault_name, file_name, upload_id, file_size, part_size)
    base_name = os.path.basename(file_name)
    info(f'Uploading {base_name} in {len(parts)} parts')
    with Pool(concurrency) as pool:
        pbar = tqdm(total=file_size, unit="", unit_scale=True, dynamic_ncols=True,
                    bar_format='{percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} ({rate_fmt})')
        for uploaded_size in pool.imap_unordered(upload_archive_part, parts):
            pbar.update(uploaded_size)
        pbar.close()

    info('Verifying checksum')
    checksum = calculate_tree_hash(open(file_name, 'rb'))
    return client.complete_multipart_upload(vaultName=vault_name, uploadId=upload_id, archiveSize=str(file_size),
                                            checksum=checksum)

Exemple #17

0

Afficher le fichier

    def multipart_upload(self, path_to_file, part_size=4, description=""):
        """Uploading a file in mutiple parts.

        Args:
            path_to_file (str): Path to the file.
            description (str, optional): Description of what is uploaded.
            part_size (int, optional): Size for the multipart parts. Defaults to 4 megabytes.
        """
        if self.validator.preupload_checks(path_to_file, part_size) and self._vault_exists(self.vault_name):
            total_size = get_file_size(path_to_file)
            part_size_bytes = get_allowed_sizes().get(str(part_size))
            parts = get_needed_parts(path_to_file, part_size_bytes, total_size)
            parts = add_byte_ranges(parts)
            response = self._initiate_multipart_upload(description, part_size_bytes, total_size)
            if self.validator.is_response_ok(response):
                upload_id = response.get("uploadId")
                upload_success = self._do_multipart_upload(upload_id, path_to_file, parts)
                if upload_success:
                    self.logger.info("Calculating tree hash...")
                    with open(path_to_file, 'rb') as file_object:
                        total_hash = calculate_tree_hash(file_object)
                    completed_response = self._complete_multipart_upload(upload_id, total_size, total_hash)
                    if self.validator.is_response_ok(completed_response):
                        self.logger.info("Upload completed.")

Exemple #18

0

Afficher le fichier

Fichier : glacier_check.py Projet : mmore500/glacier_check

def update_local_inventory(local_dir, local_filenames, old_local_inventory):
    """Update the local inventory stored as a json file in the local directory.
    The updated local inventory is returned."""

    archive_list = list()

    old_local_inventory_dict = dictify_inventory(old_local_inventory)

    for f in tqdm(local_filenames):
        if f in old_local_inventory_dict:
            checksum = old_local_inventory_dict[f]
        else:
            with open(os.path.join(local_dir, f), 'rb') as target_file:
                checksum = calculate_tree_hash(target_file)

        archive_list.append({
                "ArchiveDescription" : f,
                "CreationDate" : datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                "SHA256TreeHash" : checksum
            })

    res = {"ArchiveList" : archive_list}

    return res

Exemple #19

0

Afficher le fichier

Fichier : test_utils.py Projet : CloverHealth/botocore

 def test_tree_hash_exactly_one_mb(self):
     one_meg_bytestring = b'a' * (1 * 1024 * 1024)
     one_meg = six.BytesIO(one_meg_bytestring)
     self.assertEqual(
         calculate_tree_hash(one_meg),
         '9bc1b2a288b26af7257a36277ae3816a7d4f16e89c1e7e77d0a5c48bad62b360')

Exemple #20

0

Afficher le fichier

Fichier : test_utils.py Projet : CloverHealth/botocore

 def test_tree_hash_less_than_one_mb(self):
     one_k = six.BytesIO(b'a' * 1024)
     self.assertEqual(
         calculate_tree_hash(one_k),
         '2edc986847e209b4016e141a6dc8716d3207350f416969382d431539bf292e4a')

Exemple #21

0

Afficher le fichier

Fichier : test_utils.py Projet : CloverHealth/botocore

 def test_empty_tree_hash(self):
     self.assertEqual(
         calculate_tree_hash(six.BytesIO(b'')),
         'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855')

Exemple #22

0

Afficher le fichier

Fichier : glacier_uploader.py Projet : chancey-gardener/icepyck

def mpUpload(fname, vault, desc, acctid, hrglass):
    '''Initiate a multipart upload, split file into parts, calculate checksums,
    treehash values, for each, and uploads to glacier in parallel according to # of cores in machine.'''
    startime = time.time()
    chunkscheme = chunksize(fname)  # chunkscheme[0]= size of chunk
    fullsize = getsize(fname)
    size, partcount = chunkscheme[0], chunkscheme[1]
    #graph = ''
    try:
        if size < 42494967296:  # this is not 40gb
            try:
                print('calling aws initiate function')
                try:
                    desc = desc.replace('*', '+')
                except:
                    pass
                mpu = glacier_client.initiate_multipart_upload(
                    vaultName=vault,
                    archiveDescription=desc,
                    partSize=str(size))
            except botocore.exceptions.ParamValidationError or botocore.exceptions.ClientError:
                print(
                    'Multipart Upload operation failed, {} not uploaded to {}'.
                    format(fname, vault))
                return None
            upid = mpu['uploadId']
            print(
                '\nMultipart Upload initiated for {} to {}\n Upload Request ID: {}'
                .format(fname, vault, upid))
            try:
                print(hrglass)
            except:
                pass
            print(
                'This may take a while, {} is {} watching it happen is not recommended...\n'
                .format(fname, size_display(fullsize)))
            print('\nsplitting input file into {} parts\n'.format(partcount))
            presplit = time.time() - startime
            print('time {}'.format(timer(startime, time.time())))
            filist = unxplit(fname, size, PART_PREFIX)
            postplit = time.time() - presplit
            print('time: {}'.format(timer(postplit, time.time())))
            of = len(filist)

            if of != partcount:
                print('of: {} doesnt equal partcount: {}...'.format(
                    of, partcount))
                trash(PART_PREFIX)
                sys.exit()

            totrange = 0
            #response = []

            all_params = []

            #  iterate over each file part to compile parameters
            print('Compiling parameters for upload segments...')
            prop = 0
            for part in filist:
                part_params = {
                    'xpart': part,
                    'vault': vault,
                    'upid': upid,
                    'acctid': ACCOUNT_ID
                }
                num = filist.index(part) + 1
                bytestring = open(part, 'rb')
                thash = str(core.calculate_tree_hash(bytestring))
                part_params.update({'xthash': thash})
                bytestring.close(
                )  # size is the closest power of two that is greater than the size of the part!
                btrange = totrange + getsize(part)
                prtrange = (totrange, btrange - 1)
                part_params.update({'xrange': prtrange})
                all_params.append(part_params)

                # display progress
                comprop = prop
                prop = (len(all_params) * 100) / of
                if prop != comprop:
                    print(bar(prop), ' {} of {}'.format(num, of))
                # increment how much of the file has now been accounted for
                totrange += size  # range is increasing by size, which is larger than the size of the part
            print('Uploading to Amazon Glacier...\n')

            # map part uploads into parallel scheme using upool
            ccount = 0
            errcount = 0
            done = round((ccount / of) * 100)  #  done refers to the percentage
            pool = Pool(processes=cpu_count())
            for partload in pool.imap_unordered(
                    partfeed, all_params):  #  pool.imap() parallelization
                if not partload[2]:  #  upload part failed
                    print('failed to upload range {}-{}'.format(
                        partload[0], partload[1]))
                    errcount += 1  # make errcount a vector of the failed parameters
                else:  # successful part upload
                    pdone = done
                    ccount += 1
                    done = (ccount / of) * 100
                    if done != pdone:
                        print('byterange {}-{} successfully uploaded'.format(
                            partload[0], partload[1]))
                        print(bar(done))
            print('{} of {} successfully uploaded'.format(ccount, of))
            pool.close()
            pool.join()
            pool.terminate()
            pool = Pool(processes=cpu_count())
            print('{} failed parts'.format(errcount))
            if errcount != 0:
                #firstpass = time.time()-postplit
                errcount = 0

                uploaded = glacier_client.list_parts(vaultName=vault,
                                                     uploadId=upid)['Parts']
                while len(uploaded) < len(all_params):
                    #  check to see if all parts have uploaded
                    print('time: {}'.format(timer(postplit, time.time())))
                    postplit = time.time()
                    errcount = 0
                    print('retrying {} failed parts'.format(
                        errcount))  # no .format here
                    rangit = [i['RangeInBytes'] for i in uploaded]
                    checkparts = (tuple(byterange.split('-'))
                                  for byterange in rangit
                                  )  # received byteranges from AWS server
                    rcheckparts = [(int(ad[0]), int(ad[1]))
                                   for ad in checkparts]
                    remains = [
                        left for left in all_params
                        if left['xrange'] not in rcheckparts
                    ]

                    for partload in pool.imap(partfeed, remains):
                        if not partload[2]:
                            print('failed to upload range: {}-{}'.format(
                                partload[0], partload[1]))
                            errcount += 1
                        else:
                            ccount += 1
                            print(
                                'bytes {}-{} (part {} of {}) successfully uploaded!'
                                .format(partload[0], partload[1], ccount, of))
                            print(bar(round((ccount / of) * 100)))
                    print('{} of {} successfully uploaded'.format(ccount, of))
                    print('{} failed parts'.format(errcount))
                    pool.close()
                    pool.join()
                    uploaded = glacier_client.list_parts(
                        vaultName=vault, uploadId=upid)['Parts']
                    pool.terminate()
                    pool = Pool(processes=cpu_count())

            # close pool
            #pool.close()
            #pool.join()  # retry any uploads left over
            with open(fname, 'rb') as f:
                full_tree_hash = core.calculate_tree_hash(f)
                f.close()

            completion = glacier_client.complete_multipart_upload(
                vaultName=vault,
                uploadId=upid,
                archiveSize=str(fullsize),
                checksum=full_tree_hash,
                accountId=acctid)
            # trash(PART_PREFIX)
            print('\nMultipart Upload of Archive: {} to Vault {} Completed\n'.
                  format(fname, vault))
            pool.terminate()
            # for safety, * is marker string for history update
            output = {
                'FileName':
                fname,
                'Description':
                "{}: {} *{}* ".format(
                    completion['ResponseMetadata']['HTTPHeaders']['date'],
                    desc, fname),
                'VaultName':
                vault,
                'ArchiveId':
                completion['archiveId'],
                'Size':
                str(fullsize)
            }
            return output
        else:  # this needs to happen before the success message if
            print('Amazon does not support archive files over 40 GB, {} is {}'.
                  format(fname, size_display(fullsize)))
    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        name = split(exc_tb.tb_frame.f_code.co_filename)[1]
        print('Python raised the following excception: {} {} {} {}'.format(
            exc_type, e, name, exc_tb.tb_lineno))
        glacier_client.abort_multipart_upload(vaultName=vault, upload_id=upid)
    finally:
        trash(PART_PREFIX)
        return None

Exemple #23

0

Afficher le fichier