def test_download(dryrun, caplog): """Can download BucketItems""" items = ( BucketItem("tests/test_data/somefile.txt"), BucketItem("tests/test_data/somefile2.txt"), BucketItem("tests/test_data/somefile3.txt"), ) client = MockedClient() mocked_bucket = client._session.resource().Bucket() # Searching for the file returns an iterable of matches mocked_bucket.download_file.return_value = [] with caplog.at_level(logging.DEBUG): client.download(items, "test_bucket", dryrun=dryrun) # Expected calls to download_file download_calls = [mock.call(item.key, item.path) for item in items] if dryrun: # Should've only logged what would've been done for msg in ["Would download", "somefile.txt", "somefile2.txt"]: assert msg in caplog.text mocked_bucket.download_file.assert_not_called() else: # Should've downloaded mocked_bucket.download_file.assert_has_calls(download_calls, any_order=True) assert "Download complete" in caplog.text
def test_upload_exceptions(caplog): """Exceptions raised from upload are expressed in error logging""" items = [ BucketItem("tests/test_data/somefile3.txt"), BucketItem("tests/test_data/somefile2.txt"), BucketItem("tests/test_data/somefile.txt"), ] client = MockedClient() # Uploading fails twice before succeeding client._session.resource().Bucket().upload_file.side_effect = [ S3UploadFailedError("Error uploading somefile3.txt"), S3UploadFailedError("Error uploading somefile2.txt"), mock.DEFAULT, ] with caplog.at_level(logging.DEBUG): client.upload(items, "test_bucket") for msg in [ "One or more exceptions occurred during upload", "Error uploading somefile3.txt", "Error uploading somefile2.txt", ]: assert msg in caplog.text
def test_upload_duplicate(caplog): """Doesn't attempt to replace file objects""" item = BucketItem("tests/test_data/somefile.txt") client = MockedClient() mocked_bucket = client._session.resource().Bucket() # Searching for the file returns an iterable of matches mocked_bucket.objects.filter.return_value = [{ "ObjectSummary_obj": { "key": item.key, "bucket": "mocked_bucket" } }] with caplog.at_level(logging.DEBUG): client.upload(item, "mocked_bucket") # Should've checked bucket for duplicate file... mocked_bucket.objects.filter.assert_called_with(Prefix=item.key) # ...and found one assert "Item already in s3 bucket" in caplog.text # Should not have tried to upload mocked_bucket.upload_file.assert_not_called()
def test_publish_invalid_item(caplog): """Doesn't attempt to publish invalid items""" # Bunch of invalid items items = ( { "Item": "Invalid" }, "Not going to happen", BucketItem("tests/test_data/somefile.txt"), [2, 4, 6, 8], ) client = MockedClient() with caplog.at_level(logging.DEBUG): client.publish(items=items, table_name="test_table") for msg in [ "Expected type 'TableItem'", "dict", "str", "BucketItem", "list", ]: assert msg in caplog.text client._session.resource().Table().put_item.assert_not_called()
def test_bucket_item(): # Create BucketItem item = BucketItem(file_path="tests/test_data/somefile.txt") # Should have derived name from path assert item.path == "tests/test_data/somefile.txt" assert item.name == "somefile.txt" # Should have computed, assigned checksum assert (item.checksum == "ee21ae5cd21ff1bb2263f7c98a8557d42646ed1ec660d9c1f7c3f4e781bc6710") # Should have assigned name to key assert item.key == item.name
def test_upload(dryrun, caplog): """Can upload BucketItems""" items = ( BucketItem("tests/test_data/somefile.txt"), BucketItem("tests/test_data/somefile2.txt"), BucketItem("tests/test_data/somefile3.txt"), ) client = MockedClient() mocked_bucket = client._session.resource().Bucket() # Searching for the file returns an iterable of matches mocked_bucket.objects.filter.return_value = [] with caplog.at_level(logging.DEBUG): client.upload(items, "test_bucket", dryrun=dryrun) # Expected calls to Bucket methods objects.filters and upload_file objects_calls = [mock.call(Prefix=item.key) for item in items] upload_calls = [mock.call(item.path, item.key) for item in items] if dryrun: # Should've only logged what would've been done for msg in ["Would upload", "somefile.txt", "somefile2.txt"]: assert msg in caplog.text mocked_bucket.upload_file.assert_not_called() else: # Should've checked bucket for duplicate file... mocked_bucket.objects.filter.assert_has_calls(objects_calls, any_order=True) # ...and proceeded with upload assert "Content already present in s3 bucket" not in caplog.text # Should've uploaded mocked_bucket.upload_file.assert_has_calls(upload_calls, any_order=True) assert "Upload complete" in caplog.text
def test_download_exceptions(caplog): """Exceptions raised from download are expressed in error logging""" item = BucketItem("tests/test_data/somefile3.txt") client = MockedClient() # Files not preset in the S3 bucket client._session.resource().Bucket().download_file.side_effect = [ ClientError({"Error": { "Code": "404" }}, "download") ] with caplog.at_level(logging.DEBUG): client.download(item, "test_bucket") for msg in [ "One or more exceptions occurred during download", "An error occurred (404) when calling the download operation: Unknown", ]: assert msg in caplog.text
def test_bucket_item_get_content_type(path, content_type): item = BucketItem(file_path=path) assert item.content_type == content_type
def test_bucket_item_bad_path(): # Create BucketItem item = BucketItem(file_path="bad/path/to/nowhere") # Should not have set a checksum assert item.checksum is None
def main(): LOG.setLevel(logging.INFO) logging.basicConfig(format="%(message)s", level=logging.INFO) parser = argparse.ArgumentParser( description="Download a file from an S3 bucket.") parser.add_argument( "--object-key", required=True, help="The file object's key in the S3 bucket", ) parser.add_argument( "--file-path", required=True, help="Local filesystem path at which to save the file.", ) parser.add_argument("--bucket", required=True, help="S3 bucket in which to upload file.") parser.add_argument( "--aws-access-id", default=None, help="Access ID for Amazon services. If no ID is provided, attempts to" " find it among environment variables and ~/.aws/config file will" " be made", ) parser.add_argument( "--aws-access-key", default=None, help="Access key for Amazon services. If no key is provided, attempts" " to find it among environment variables and ~/.aws/config file" " will be made", ) parser.add_argument( "--aws-session-token", default=None, help="Session token for Amazon services. If no token is provided," " attempts to find it among environment variables and" " ~/.aws/config file will be made", ) parser.add_argument( "--dryrun", action="store_true", help="Don't execute the action, only log what would otherwise be done.", ) parser.add_argument("--debug", action="store_true", help="Include debug logging.") p = parser.parse_args() if p.debug: logging.getLogger("chexus").setLevel(logging.DEBUG) LOG.setLevel(logging.DEBUG) client = Client( access_id=p.aws_access_id, access_key=p.aws_access_key, session_token=p.aws_session_token, ) download_item = BucketItem(file_path=p.file_path, key=p.object_key) if download_item.checksum: resp = input( "File already exists. Do you want to overwrite it? [y/N]: ") if str(resp).lower() not in ("yes", "y"): LOG.info("Aborting...") return client.download(items=download_item, bucket_name=p.bucket, dryrun=p.dryrun) downloaded_item = BucketItem(file_path=p.file_path, key=p.object_key) assert downloaded_item.checksum