Esempio n. 1
0
def upload_asset_to_path_hook(instance, filename=None):
    '''This returns the asset upload path on S3 and compute the asset file multihash

    Args:
        instance: Asset
            Asset instance
        filename: string
            file name of the uploaded asset

    Returns:
        Asset file path to use on S3
    '''
    logger.debug('Start computing asset file %s multihash', filename)
    start = time.time()
    ctx = hashlib.sha256()
    for chunk in instance.file.chunks(settings.UPLOAD_FILE_CHUNK_SIZE):
        ctx.update(chunk)
    mhash = multihash.to_hex_string(multihash.encode(ctx.digest(), 'sha2-256'))
    # set the hash to the storage to use it for upload signing, this temporary attribute is
    # then used by storages.S3Storage to set the MetaData.sha256
    setattr(instance.file.storage, '_tmp_sha256', ctx.hexdigest())
    logger.debug(
        'Set uploaded file %s multihash %s to checksum:multihash; computation done in %ss',
        filename, mhash,
        time.time() - start)
    instance.checksum_multihash = mhash
    return get_asset_path(instance.item, instance.name)
Esempio n. 2
0
def get_sha256_multihash(content):
    '''Get the sha2-256 multihash of the bytes content

    Args:
        content: bytes

    Returns:
        sha256 multihash string
    '''
    digest = hashlib.sha256(content).digest()
    return multihash.to_hex_string(multihash.encode(digest, 'sha2-256'))
Esempio n. 3
0
def decode(value):
    """
    Decode HEX multi hash.

    :param bytes value: an encoded content

    :return: the decoded content
    :rtype: str
    """

    cid = make_cid(value)
    return multihash.to_hex_string(multihash.decode(cid.multihash).digest)
Esempio n. 4
0
def create_multihash_string(digest, hash_code):
    '''Returns a multihash string from a digest

    Args:
        digest: string
        hash_code: string | int
            hash code (sha2-256, md5, ...)

    Returns: string
        multihash string
    '''
    return multihash.to_hex_string(multihash.encode(digest, hash_code))
Esempio n. 5
0
def _validate_asset_file_checksum(href, expected_multihash, asset_multihash):
    expected_multihash = multihash.decode(from_hex_string(expected_multihash))

    logger.debug(
        'Validate asset file checksum at %s with multihash %s/%s (from headers), expected %s/%s '
        '(from checksum:multishash attribute)', href,
        to_hex_string(asset_multihash.digest), asset_multihash.name,
        to_hex_string(expected_multihash.digest), expected_multihash.name)

    if asset_multihash.name != expected_multihash.name:
        logger.error(
            'Asset at href %s, with multihash name=%s digest=%s, doesn\'t match the expected '
            'multihash name=%s digest=%s defined in checksum:multihash attribute',
            href, asset_multihash.name, to_hex_string(asset_multihash.digest),
            expected_multihash.name, to_hex_string(expected_multihash.digest))
        raise serializers.ValidationError({
            'href':
            _(f"Asset at href {href} has a {asset_multihash.name} multihash while a "
              f"{expected_multihash.name} multihash is defined in the checksum:multihash "
              "attribute")
        })

    if asset_multihash != expected_multihash:
        logger.error(
            'Asset at href %s, with multihash name=%s digest=%s, doesn\'t match the '
            'checksum:multihash value name=%s digest=%s', href,
            asset_multihash.name, to_hex_string(asset_multihash.digest),
            expected_multihash.name, to_hex_string(expected_multihash.digest))
        raise serializers.ValidationError({
            'href':
            _(f"Asset at href {href} with {asset_multihash.name} hash "
              f"{to_hex_string(asset_multihash.digest)} doesn't match the "
              f"checksum:multihash {to_hex_string(expected_multihash.digest)}")
        })
Esempio n. 6
0
def encode(codec, value):
    """
    Encode a content hash.

    :param str codec: a codec of a content hash
    :param str value: a value of a content hash

    :return: the resulting content hash
    :rtype: str
    """

    profile = get_profile(codec)

    value = profile.encode(value)
    value = multicodec.add_prefix(codec, value)
    return multihash.to_hex_string(value)
Esempio n. 7
0
def multihash_checksum_sha256(file_path: Union[str, BytesIO]):
    """Generate the checksum multihash.

    This method follows the spec `multihash <https://github.com/multiformats/multihash>`_.
    We use `sha256` as described in ``check_sum``. The multihash spec defines the code `0x12` for `sha256` and
    must have `0x20` (32 chars) length.

    See more in https://github.com/multiformats/py-multihash/blob/master/multihash/constants.py#L4

    Args:
        file_path (str|BytesIo): Path to the file

    Returns:
        A string-like hash in hex-decimal
    """
    sha256 = 0x12
    sha256_length = 0x20

    _hash = _multihash.encode(digest=check_sum(file_path),
                              code=sha256,
                              length=sha256_length)

    return _multihash.to_hex_string(_hash)
    def create_multipart_upload(self, key, asset, checksum_multihash):
        '''Create a multi part upload on the backend

        Args:
            key: string
                key on the S3 backend for which we want to create a multipart upload
            asset: Asset
                Asset metadata model associated with the S3 backend key
            checksum_multihash: string
                Checksum multihash (must be sha256) of the future file to be uploaded

        Returns: string
            Upload Id of the created multipart upload
        '''
        sha256 = to_hex_string(parse_multihash(checksum_multihash).digest)
        response = self.call_s3_api(
            self.s3.create_multipart_upload,
            Bucket=settings.AWS_STORAGE_BUCKET_NAME,
            Key=key,
            Metadata={'sha256': sha256},
            CacheControl=', '.join(
                ['public',
                 f'max-age={settings.STORAGE_ASSETS_CACHE_SECONDS}']),
            ContentType=asset.media_type,
            log_extra={
                'collection': asset.item.collection.name,
                'item': asset.item.name,
                'asset': asset.name
            })
        logger.info('S3 Multipart upload successfully created: upload_id=%s',
                    response['UploadId'],
                    extra={
                        's3_response': response,
                        'upload_id': response['UploadId'],
                        'asset': asset.name
                    })
        return response['UploadId']
Esempio n. 9
0
 def test_to_hex_string_invalid_type(self, value):
     """ to_hex_string: raises TypeError for invalid types """
     with pytest.raises(TypeError) as excinfo:
         to_hex_string(value)
     assert 'multihash should be bytes' in str(excinfo.value)
Esempio n. 10
0
 def test_to_hex_string_valid(self, value):
     """ to_hex_string: test if it passes for all valid cases """
     code = value['encoding']['code']
     buffer = encode(bytes.fromhex(value['hex']), code)
     assert to_hex_string(buffer) == hexlify(buffer).decode()
Esempio n. 11
0
import hashlib

import multihash

from topo_processor.file_system.get_fs import get_fs

CHUNK_SIZE = 1024 * 1024  # 1MB


async def multihash_as_hex(path: str) -> str:
    file_hash = hashlib.sha256()
    with get_fs(path).open(path, "rb") as file:
        while chunk := file.read(CHUNK_SIZE):
            file_hash.update(chunk)
    return multihash.to_hex_string(
        multihash.encode(file_hash.digest(), "sha2-256"))
Esempio n. 12
0
 def test_invalid_to_hex_string(self):
     # In actual fact, type checking should catch this for us
     with pytest.raises(TypeError):
         multihash.to_hex_string('hello world')
Esempio n. 13
0
 def test_valid_to_hex_string(self, valid):
     for case in valid:
         code = case['encoding']['code']
         buf = multihash.encode(bytes.fromhex(case['hex']), code)
         assert multihash.to_hex_string(buf) == hexlify(buf).decode()