def calculate_checksum(path):
    path_obj = Path(path)
    if path_obj.is_file():
        checksum = file_checksum(path)
    else:
        checksum = dir_checksum(path)
    return checksum
Example #2
0
 def test_file_checksum(self):
     s3_uploader = S3Uploader(
         s3_client=self.s3,
         bucket_name=self.bucket_name,
         prefix=self.prefix,
         kms_key_id=self.kms_key_id,
         force_upload=self.force_upload,
     )
     with tempfile.NamedTemporaryFile(mode="wb", delete=False) as f:
         f.write(b"Hello World!")
         f.seek(0)
         self.assertEqual("ed076287532e86365e841e92bfc50d8c",
                          file_checksum(f.name))
Example #3
0
 def test_s3_upload_with_dedup(self):
     s3_uploader = S3Uploader(
         s3_client=self.s3,
         bucket_name=self.bucket_name,
         prefix=self.prefix,
         kms_key_id=self.kms_key_id,
         force_upload=self.force_upload,
     )
     self.s3.head_object = MagicMock(side_effect=ClientError(
         error_response={}, operation_name="head_object"))
     with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
         s3_url = s3_uploader.upload_with_dedup(f.name, "zip")
         self.assertEqual(
             s3_url, "s3://{0}/{1}/{2}.zip".format(self.bucket_name,
                                                   self.prefix,
                                                   file_checksum(f.name)))
Example #4
0
    def upload_with_dedup(self, file_name, extension=None, precomputed_md5=None):
        """
        Makes and returns name of the S3 object based on the file's MD5 sum

        :param file_name: file to upload
        :param extension: String of file extension to append to the object
        :param precomputed_md5: Specified md5 hash for the file to be uploaded.
        :return: S3 URL of the uploaded object
        """

        # This construction of remote_path is critical to preventing duplicate
        # uploads of same object. Uploader will check if the file exists in S3
        # and re-upload only if necessary. So the template points to same file
        # in multiple places, this will upload only once
        filemd5 = precomputed_md5 or file_checksum(file_name)
        remote_path = filemd5
        if extension:
            remote_path = remote_path + "." + extension

        return self.upload(file_name, remote_path)