def test_get_s3_bucket_objects_make_correct_pagination_call( notify_api, mocker): paginator_mock = mocker.patch("app.aws.s3.client") get_s3_bucket_objects("foo-bucket", subfolder="bar") paginator_mock.assert_has_calls( [call().get_paginator().paginate(Bucket="foo-bucket", Prefix="bar")])
def test_get_s3_bucket_objects_make_correct_pagination_call( notify_api, mocker): paginator_mock = mocker.patch('app.aws.s3.client') get_s3_bucket_objects('foo-bucket', subfolder='bar') paginator_mock.assert_has_calls( [call().get_paginator().paginate(Bucket='foo-bucket', Prefix='bar')])
def collate_letter_pdfs_for_day(date=None): if not date: # Using the truncated date is ok because UTC to BST does not make a difference to the date, # since it is triggered mid afternoon. date = datetime.utcnow().strftime("%Y-%m-%d") letter_pdfs = sorted(s3.get_s3_bucket_objects( current_app.config['LETTERS_PDF_BUCKET_NAME'], subfolder=date), key=lambda letter: letter['Key']) for i, letters in enumerate(group_letters(letter_pdfs)): filenames = [letter['Key'] for letter in letters] hash = urlsafe_b64encode(sha512( ''.join(filenames).encode()).digest())[:20].decode() # eg NOTIFY.2018-12-31.001.Wjrui5nAvObjPd-3GEL-.ZIP dvla_filename = 'NOTIFY.{date}.{num:03}.{hash}.ZIP'.format(date=date, num=i + 1, hash=hash) current_app.logger.info( 'Calling task zip-and-send-letter-pdfs for {} pdfs to upload {} with total size {:,} bytes' .format(len(filenames), dvla_filename, sum(letter['Size'] for letter in letters))) notify_celery.send_task(name=TaskNames.ZIP_AND_SEND_LETTER_PDFS, kwargs={ 'filenames_to_zip': filenames, 'upload_filename': dvla_filename }, queue=QueueNames.PROCESS_FTP, compression='zlib')
def _delete_letters_from_s3(notification_type, service_id, date_to_delete_from, query_limit): bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME'] letters_to_delete_from_s3 = db.session.query(Notification).filter( Notification.notification_type == notification_type, Notification.created_at < date_to_delete_from, Notification.service_id == service_id, # although letters in non completed statuses do have PDFs in s3, they do not exist in the # production-letters-pdf bucket as they never made it that far so we do not try and delete # them from it Notification.status.in_(NOTIFICATION_STATUS_TYPES_COMPLETED)).limit( query_limit).all() for letter in letters_to_delete_from_s3: prefix = get_letter_pdf_filename( reference=letter.reference, crown=letter.service.crown, created_at=letter.created_at, ignore_folder=letter.key_type == KEY_TYPE_TEST, postage=letter.postage) s3_objects = get_s3_bucket_objects(bucket_name=bucket_name, subfolder=prefix) for s3_object in s3_objects: try: remove_s3_object(bucket_name, s3_object['Key']) except ClientError: current_app.logger.exception( "Could not delete S3 object with filename: {}".format( s3_object['Key']))
def _delete_letters_from_s3(notification_type, service_id, date_to_delete_from, query_limit): letters_to_delete_from_s3 = db.session.query(Notification).filter( Notification.notification_type == notification_type, Notification.created_at < date_to_delete_from, Notification.service_id == service_id).limit(query_limit).all() for letter in letters_to_delete_from_s3: bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME'] if letter.sent_at: sent_at = str(letter.sent_at.date()) prefix = LETTERS_PDF_FILE_LOCATION_STRUCTURE.format( folder=sent_at + "/", reference=letter.reference, duplex="D", letter_class="2", colour="C", crown="C" if letter.service.crown else "N", date='').upper()[:-5] s3_objects = get_s3_bucket_objects(bucket_name=bucket_name, subfolder=prefix) for s3_object in s3_objects: try: remove_s3_object(bucket_name, s3_object['Key']) except ClientError: current_app.logger.exception( "Could not delete S3 object with filename: {}".format( s3_object['Key']))
def _delete_letters_from_s3(notification_type, service_id, date_to_delete_from, query_limit): letters_to_delete_from_s3 = db.session.query(Notification).filter( Notification.notification_type == notification_type, Notification.created_at < date_to_delete_from, Notification.service_id == service_id).limit(query_limit).all() for letter in letters_to_delete_from_s3: bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME'] # I don't think we need this anymore, we should update the query to get letters sent 7 days ago if letter.sent_at: prefix = get_letter_pdf_filename( reference=letter.reference, crown=letter.service.crown, created_at=letter.created_at, ignore_folder=letter.key_type == KEY_TYPE_TEST, postage=letter.postage) s3_objects = get_s3_bucket_objects(bucket_name=bucket_name, subfolder=prefix) for s3_object in s3_objects: try: remove_s3_object(bucket_name, s3_object['Key']) except ClientError: current_app.logger.exception( "Could not delete S3 object with filename: {}".format( s3_object['Key']))
def collate_letter_pdfs_for_day(date): letter_pdfs = s3.get_s3_bucket_objects( current_app.config['LETTERS_PDF_BUCKET_NAME'], subfolder=date) for letters in group_letters(letter_pdfs): filenames = [letter['Key'] for letter in letters] current_app.logger.info( 'Calling task zip-and-send-letter-pdfs for {} pdfs of total size {:,} bytes' .format(len(filenames), sum(letter['Size'] for letter in letters))) notify_celery.send_task(name=TaskNames.ZIP_AND_SEND_LETTER_PDFS, kwargs={'filenames_to_zip': filenames}, queue=QueueNames.PROCESS_FTP, compression='zlib')
def delete_dvla_response_files_older_than_seven_days(): try: start = datetime.utcnow() bucket_objects = s3.get_s3_bucket_objects( current_app.config['DVLA_RESPONSE_BUCKET_NAME'], 'root/dispatch') older_than_seven_days = s3.filter_s3_bucket_objects_within_date_range( bucket_objects) for f in older_than_seven_days: s3.remove_s3_object( current_app.config['DVLA_RESPONSE_BUCKET_NAME'], f['Key']) current_app.logger.info( "Delete dvla response files started {} finished {} deleted {} files" .format(start, datetime.utcnow(), len(older_than_seven_days))) except SQLAlchemyError: current_app.logger.exception("Failed to delete dvla response files") raise
def test_get_s3_bucket_objects_builds_objects_list_from_paginator( notify_api, mocker): AFTER_SEVEN_DAYS = datetime_in_past(days=8) paginator_mock = mocker.patch('app.aws.s3.client') multiple_pages_s3_object = [{ "Contents": [ single_s3_object_stub('bar/foo.txt', AFTER_SEVEN_DAYS), ] }, { "Contents": [ single_s3_object_stub('bar/foo1.txt', AFTER_SEVEN_DAYS), ] }] paginator_mock.return_value.get_paginator.return_value.paginate.return_value = multiple_pages_s3_object bucket_objects = get_s3_bucket_objects('foo-bucket', subfolder='bar') assert len(bucket_objects) == 2 assert set(bucket_objects[0].keys()) == set( ['ETag', 'Key', 'LastModified'])
def test_get_s3_bucket_objects_builds_objects_list_from_paginator( notify_api, mocker): AFTER_SEVEN_DAYS = datetime_in_past(days=8) paginator_mock = mocker.patch("app.aws.s3.client") multiple_pages_s3_object = [ { "Contents": [ single_s3_object_stub("bar/foo.txt", AFTER_SEVEN_DAYS), ] }, { "Contents": [ single_s3_object_stub("bar/foo1.txt", AFTER_SEVEN_DAYS), ] }, ] paginator_mock.return_value.get_paginator.return_value.paginate.return_value = multiple_pages_s3_object bucket_objects = get_s3_bucket_objects("foo-bucket", subfolder="bar") assert len(bucket_objects) == 2 assert set(bucket_objects[0].keys()) == set( ["ETag", "Key", "LastModified"])