def test_update_or_create__when_area_exists__retrieves_db_record(self): db_area = self.create_upload_area() area = UploadArea(uuid=db_area.uuid) area.update_or_create() self.assertEqual(db_area.id, area.db_id)
def setUp(self): super().setUp() self.db_session_maker = DBSessionMaker() self.db = self.db_session_maker.session() self.upload_area_id = str(uuid.uuid4()) self.upload_area = UploadArea(self.upload_area_id) self.upload_area.update_or_create()
def test_ls__returns_info_on_all_files_in_upload_area(self): db_area = self.create_upload_area() o1 = self.mock_upload_file_to_s3( db_area.uuid, 'file1.json', content_type='application/json; dcp-type="metadata/foo"') o2 = self.mock_upload_file_to_s3( db_area.uuid, 'file2.fastq.gz', content_type='application/octet-stream; dcp-type=data', checksums={ 's3_etag': 'a', 'sha1': 'b', 'sha256': 'c', 'crc32c': 'd' }) area = UploadArea(uuid=db_area.uuid) data = area.ls() self.assertIn( 'size', data['files'][0].keys()) # moto file sizes are not accurate for fileinfo in data['files']: del fileinfo['size'] self.assertEqual( { 'upload_area_id': db_area.uuid, 'name': 'file1.json', 'last_modified': o1.last_modified.isoformat(), 'content_type': 'application/json; dcp-type="metadata/foo"', 'url': f"s3://{self.upload_config.bucket_name}/{db_area.uuid}/file1.json", 'checksums': { 's3_etag': '1', 'sha1': '2', 'sha256': '3', 'crc32c': '4' } }, data['files'][0]) self.assertEqual( { 'upload_area_id': db_area.uuid, 'name': 'file2.fastq.gz', 'last_modified': o2.last_modified.isoformat(), 'content_type': 'application/octet-stream; dcp-type=data', 'url': f"s3://{self.upload_config.bucket_name}/{db_area.uuid}/file2.fastq.gz", 'checksums': { 's3_etag': 'a', 'sha1': 'b', 'sha256': 'c', 'crc32c': 'd' } }, data['files'][1])
def test_with_existing_unlocked_upload_area__returns_creds(self): db_area = self.create_upload_area() area = UploadArea(db_area.uuid) creds = area.credentials() keys = list(creds.keys()) keys.sort() self.assertEqual( ['AccessKeyId', 'Expiration', 'SecretAccessKey', 'SessionToken'], keys)
def setUp(self): super().setUp() self.upload_area_id = str(uuid.uuid4()) self.upload_area = UploadArea(self.upload_area_id) self.upload_area.update_or_create() self.checksum_id = str(uuid.uuid4()) self.job_id = str(uuid.uuid4()) self.s3client = boto3.client('s3')
def test_upload_area_delete_over_timeout(self, mock_retrieve_lambda_timeout): area_uuid = self._create_area() obj = self.upload_bucket.Object(f'{area_uuid}/test_file') obj.put(Body="foo") mock_retrieve_lambda_timeout.return_value = 0 area = UploadArea(area_uuid) area.delete() record = UploadDB().get_pg_record("upload_area", area_uuid, column='uuid') self.assertEqual("DELETION_QUEUED", record["status"])
class ChecksumDaemonTest(UploadTestCaseUsingMockAWS): def _make_dbfile(self, upload_area, test_file, checksums=None): return DbFile(s3_key=f"{upload_area.uuid}/{test_file.name}", s3_etag=test_file.e_tag, upload_area_id=upload_area.db_id, name=test_file.name, size=test_file.size, checksums=checksums) def setUp(self): super().setUp() # Environment self.environment = { 'INGEST_AMQP_SERVER': 'foo', 'CSUM_DOCKER_IMAGE': 'bogoimage' } self.environmentor = EnvironmentSetup(self.environment) self.environmentor.enter() # Upload area self.area_uuid = str(uuid.uuid4()) self.upload_area = UploadArea(self.area_uuid) self.upload_area.update_or_create() # daemon context = Mock() self.daemon = ChecksumDaemon(context) # File self.small_file = FixtureFile.factory('foo') self.file_key = f"{self.area_uuid}/{self.small_file.name}" self.object = self.upload_bucket.Object(self.file_key) self.object.put(Key=self.file_key, Body=self.small_file.contents, ContentType=self.small_file.content_type) # Event self.events = {'Records': [ {'eventVersion': '2.0', 'eventSource': 'aws:s3', 'awsRegion': 'us-east-1', 'eventTime': '2017-09-15T00:05:10.378Z', 'eventName': 'ObjectCreated:Put', 'userIdentity': {'principalId': 'AWS:AROAI4WRRXW2K3Y2IFL6Q:upload-api-dev'}, 'requestParameters': {'sourceIPAddress': '52.91.56.220'}, 'responseElements': {'x-amz-request-id': 'FEBC85CADD1E3A66', 'x-amz-id-2': 'xxx'}, 's3': {'s3SchemaVersion': '1.0', 'configurationId': 'NGZjNmM0M2ItZTk0Yi00YTExLWE2NDMtMzYzY2UwN2EyM2Nj', 'bucket': {'name': self.upload_config.bucket_name, 'ownerIdentity': {'principalId': 'A29PZ5XRQWJUUM'}, 'arn': f'arn:aws:s3:::{self.upload_config.bucket_name}'}, 'object': {'key': self.file_key, 'size': self.small_file.size, 'eTag': self.small_file.e_tag, 'sequencer': '0059BB193641C4EAB0'}}}]} self.db_session_maker = DBSessionMaker() self.db = self.db_session_maker.session()
def test_post_checksum__for_an_obj_without_tags__updates_db_but_and_does_not_notify_ingest( self, mock_fasn): checksum_id = str(uuid.uuid4()) db_area = self.create_upload_area() upload_area = UploadArea(db_area.uuid) s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json', checksums={}) uploaded_file = UploadedFile(upload_area, s3object=s3obj) checksum_event = ChecksumEvent(file_id=uploaded_file.db_id, checksum_id=checksum_id, job_id='12345', status="SCHEDULED") checksum_event.create_record() response = self.client.post( f"/v1/area/{upload_area.uuid}/update_checksum/{checksum_id}", json={ "status": "CHECKSUMMED", "job_id": checksum_event.job_id, "payload": uploaded_file.info() }) self.assertEqual(204, response.status_code) db_checksum = self.db.query(DbChecksum).filter( DbChecksum.id == checksum_id).one() self.assertEqual("CHECKSUMMED", db_checksum.status) mock_fasn.assert_not_called()
def test_credentials_with_existing_locked_upload_area(self): area_uuid = self._create_area() UploadArea(area_uuid).lock() response = self.client.post(f"/v1/area/{area_uuid}/credentials") self.assertEqual(409, response.status_code)
def test_post_checksum__with_a_checksumming_payload__updates_db_record(self, mock_format_and_send_notification): checksum_id = str(uuid.uuid4()) db_area = self.create_upload_area() upload_area = UploadArea(db_area.uuid) s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json') uploaded_file = UploadedFile(upload_area, s3object=s3obj) checksum_event = ChecksumEvent(file_id=uploaded_file.db_id, checksum_id=checksum_id, job_id='12345', status="SCHEDULED") checksum_event.create_record() response = self.client.post(f"/v1/area/{upload_area.uuid}/update_checksum/{checksum_id}", headers=self.authentication_header, json={ "status": "CHECKSUMMING", "job_id": checksum_event.job_id, "payload": uploaded_file.info() }) self.assertEqual(204, response.status_code) db_checksum = self.db.query(DbChecksum).filter(DbChecksum.id == checksum_id).one() self.assertEqual("CHECKSUMMING", db_checksum.status) mock_format_and_send_notification.assert_not_called()
def test_update_event_with_validation_event(self, mock_format_and_send_notification): validation_id = str(uuid.uuid4()) area_id = self._create_area() s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json') upload_area = UploadArea(area_id) uploaded_file = UploadedFile(upload_area, s3object=s3obj) validation_event = ValidationEvent(file_ids=[uploaded_file.db_id], validation_id=validation_id, job_id='12345', status="SCHEDULED") validation_event.create_record() validation_event.status = "VALIDATING" response = update_event(validation_event, uploaded_file.info(), self.client) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("validation", validation_id) self.assertEqual("VALIDATING", record["status"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at")))) self.assertEqual(None, record["validation_ended_at"]) self.assertEqual(None, record.get("results")) validation_event.status = "VALIDATED" response = update_event(validation_event, uploaded_file.info(), self.client) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("validation", validation_id) self.assertEqual("VALIDATED", record["status"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at")))) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_ended_at")))) self.assertEqual(uploaded_file.info(), record.get("results"))
def test_update_event_with_checksum_event(self, mock_format_and_send_notification): checksum_id = str(uuid.uuid4()) area_uuid = self._create_area() s3obj = self.mock_upload_file_to_s3(area_uuid, 'foo.json') upload_area = UploadArea(area_uuid) uploaded_file = UploadedFile(upload_area, s3object=s3obj) checksum_event = ChecksumEvent(file_id=uploaded_file.db_id, checksum_id=checksum_id, job_id='12345', status="SCHEDULED") checksum_event.create_record() checksum_event.status = "CHECKSUMMING" response = update_event(checksum_event, uploaded_file.info(), self.client) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("checksum", checksum_id) self.assertEqual("CHECKSUMMING", record["status"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_started_at")))) self.assertEqual(None, record["checksum_ended_at"]) checksum_event.status = "CHECKSUMMED" response = update_event(checksum_event, uploaded_file.info(), self.client) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("checksum", checksum_id) self.assertEqual("CHECKSUMMED", record["status"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_started_at")))) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_ended_at"))))
def test_validating_status_file_validation(self, mock_format_and_send_notification): validation_id = str(uuid.uuid4()) orig_val_id = str(uuid.uuid4()) area_id = self._create_area() s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json') upload_area = UploadArea(area_id) uploaded_file = UploadedFile(upload_area, s3object=s3obj) validation_event = ValidationEvent(file_ids=[uploaded_file.db_id], validation_id=validation_id, job_id='12345', status="SCHEDULED", docker_image="test_docker_image", original_validation_id=orig_val_id) validation_event.create_record() data = { "status": "VALIDATING", "job_id": validation_event.job_id, "payload": uploaded_file.info() } response = self.client.post(f"/v1/area/{area_id}/update_validation/{validation_id}", headers=self.authentication_header, data=json.dumps(data)) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("validation", validation_id) self.assertEqual("test_docker_image", record["docker_image"]) self.assertEqual(validation_id, record["id"]) self.assertEqual(orig_val_id, record["original_validation_id"]) self.assertEqual("VALIDATING", record["status"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at")))) self.assertEqual(None, record["validation_ended_at"]) self.assertEqual(None, record.get("results")) response = self.client.get(f"/v1/area/{area_id}/foo.json/validate") validation_status = response.get_json()['validation_status'] self.assertEqual(validation_status, "VALIDATING") mock_format_and_send_notification.assert_not_called()
def test_delete__marks_area_delete_and_deletes_objects(self): db_area = self.create_upload_area(db_session=self.db) obj = self.upload_bucket.Object(f'{db_area.uuid}/test_file') obj.put(Body="foo") with patch( 'upload.common.upload_area.UploadArea._retrieve_upload_area_deletion_lambda_timeout' ) as mock_retr: mock_retr.return_value = 900 area = UploadArea(uuid=db_area.uuid) area.delete() self.db.refresh(db_area) self.assertEqual("DELETED", db_area.status) with self.assertRaises(ClientError): obj.load()
def test_unscheduled_status_file_validation(self, mock_format_and_send_notification): area_id = self._create_area() s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json') upload_area = UploadArea(area_id) UploadedFile(upload_area, s3object=s3obj) response = self.client.get(f"/v1/area/{area_id}/foo.json/validate") validation_status = response.get_json()['validation_status'] self.assertEqual(validation_status, "UNSCHEDULED")
def test_credentials_with_deleted_upload_area(self, mock_area_deletion_timeout): area_uuid = self._create_area() mock_area_deletion_timeout.return_value = 900 UploadArea(area_uuid).delete() response = self.client.post(f"/v1/area/{area_uuid}/credentials") self.assertEqual(404, response.status_code)
def schedule_file_validation(event, context): logger.info(f"initiated schedule_file_validation with {event}") unwrapped_event = json.loads(event["Records"][0]["body"]) upload_area_uuid = unwrapped_event["upload_area_uuid"] filenames = unwrapped_event["filenames"] validation_id = unwrapped_event["validation_id"] image = unwrapped_event["validator_docker_image"] env = unwrapped_event["environment"] orig_validation_id = unwrapped_event["orig_validation_id"] upload_area = UploadArea(upload_area_uuid) files = [] for filename in filenames: file = upload_area.uploaded_file(filename) files.append(file) validation_scheduler = ValidationScheduler(upload_area_uuid, files) validation_id = validation_scheduler.schedule_batch_validation( validation_id, image, env, orig_validation_id) logger.info(f"scheduled batch job with {event}")
def test_store_file(self): db_area = self.create_upload_area() area = UploadArea(uuid=db_area.uuid) filename = "some.json" content_type = 'application/json; dcp-type="metadata/sample"' content = "exquisite corpse" file = area.store_file(filename, content=content, content_type=content_type) s3_key = f"{db_area.uuid}/some.json" s3_etag = "18f17fbfdd21cf869d664731e10d4ffd" o1 = self.upload_bucket.Object(s3_key) o1.load() self.assertEqual( { 'upload_area_id': db_area.uuid, 'name': 'some.json', 'size': 16, 'last_modified': o1.last_modified.isoformat(), 'content_type': 'application/json; dcp-type="metadata/sample"', 'url': f"s3://{self.upload_config.bucket_name}/{db_area.uuid}/some.json", 'checksums': { "crc32c": "FE9ADA52", "s3_etag": s3_etag, "sha1": "b1b101e21cf9cf8a4729da44d7818f935eec0ce8", "sha256": "29f5572dfbe07e1db9422a4c84e3f9e455aab9ac596f0bf3340be17841f26f70" } }, file.info()) obj = self.upload_bucket.Object(f"{db_area.uuid}/some.json") self.assertEqual("exquisite corpse".encode('utf8'), obj.get()['Body'].read()) db_file = self.db.query(DbFile).filter( DbFile.s3_key == s3_key, DbFile.s3_etag == s3_etag).one() self.assertEqual(16, db_file.size) self.assertEqual(db_area.id, db_file.upload_area_id) self.assertEqual("some.json", db_file.name)
def test_get_checksum__for_a_file_with_no_checksum_records__returns_status_unscheduled(self, mock_fasn): db_area = self.create_upload_area() upload_area = UploadArea(db_area.uuid) s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json') UploadedFile(upload_area, s3object=s3obj) # creates file record response = self.client.get(f"/v1/area/{upload_area.uuid}/foo.json/checksum") checksum_status = response.get_json()['checksum_status'] self.assertEqual("UNSCHEDULED", checksum_status)
def setUp(self): super().setUp() self.area_uuid = str(uuid.uuid4()) self.upload_area = UploadArea(self.area_uuid) self.db = UploadDB() self.db.create_pg_record( "upload_area", { "uuid": self.area_uuid, "status": "UNLOCKED", "bucket_name": self.upload_config.bucket_name })
def test_add_uploaded_file_to_csum_daemon_sqs(self): area_uuid = self._create_area() UploadArea(area_uuid).add_uploaded_file_to_csum_daemon_sqs( "filename123") message = self.sqs.meta.client.receive_message(QueueUrl='bogo_url') message_body = json.loads(message['Messages'][0]['Body']) s3_key = message_body['Records'][0]['s3']['object']['key'] s3_bucket = message_body['Records'][0]['s3']['bucket']['name'] self.assertEqual(s3_key, f"{area_uuid}/filename123") self.assertEqual(s3_bucket, "bogobucket")
def test_uploaded_file(self): db_area = self.create_upload_area() filename = "somefile.json" content = "sdfewrwer" self.mock_upload_file_to_s3(db_area.uuid, filename=filename, contents=content) file = UploadArea(uuid=db_area.uuid).uploaded_file(filename) self.assertIs(UploadedFile, file.__class__) self.assertEqual(filename, file.name)
def test_with_existing_locked_upload_area__raises(self): db_area = self.create_upload_area() area = UploadArea(db_area.uuid) area.lock() with self.assertRaises(UploadException): area.credentials()
def test_update_or_create__when_no_area_exists__creates_db_record(self): area_uuid = str(uuid.uuid4()) with self.assertRaises(NoResultFound): self.db.query(DbUploadArea).filter( DbUploadArea.uuid == area_uuid).one() UploadArea(uuid=area_uuid).update_or_create() record = self.db.query(DbUploadArea).filter( DbUploadArea.uuid == area_uuid).one() self.assertEqual(area_uuid, record.uuid) self.assertEqual(self.upload_config.bucket_name, record.bucket_name) self.assertEqual("UNLOCKED", record.status)
def test_add_upload_area_to_delete_sqs(self): area_uuid = self._create_area() UploadArea(area_uuid).add_upload_area_to_delete_sqs() message = self.sqs.meta.client.receive_message( QueueUrl='delete_sqs_url') message_body = json.loads(message['Messages'][0]['Body']) self.assertEqual(message_body['area_uuid'], area_uuid) record = UploadDB().get_pg_record("upload_area", area_uuid, column='uuid') self.assertEqual(record['status'], "DELETION_QUEUED")
class TestUploadAreaLocking(UploadAreaTest): def setUp(self): super().setUp() self.db_area = self.create_upload_area(db_session=self.db) self.area = UploadArea(uuid=self.db_area.uuid) def test_lock__with_unlocked_area__locks_area(self): self.assertEqual("UNLOCKED", self.db_area.status) self.area.lock() self.db.refresh(self.db_area) self.assertEqual("LOCKED", self.db_area.status) def test_unlock__with_locked_area__unlocks_area(self): self.db_area.status = 'LOCKED' self.db.add(self.db_area) self.db.commit() self.area.unlock() self.db.refresh(self.db_area) self.assertEqual("UNLOCKED", self.db_area.status)
def test_scheduled_status_file_validation(self, mock_format_and_send_notification): validation_id = str(uuid.uuid4()) area_id = self._create_area() s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json') upload_area = UploadArea(area_id) uploaded_file = UploadedFile(upload_area, s3object=s3obj) validation_event = ValidationEvent(file_ids=[uploaded_file.db_id], validation_id=validation_id, job_id='12345', status="SCHEDULED") validation_event.create_record() response = self.client.get(f"/v1/area/{area_id}/foo.json/validate") validation_status = response.get_json()['validation_status'] self.assertEqual(validation_status, "SCHEDULED")
def test_validated_status_file_validation(self, mock_format_and_send_notification): validation_id = str(uuid.uuid4()) area_id = self._create_area() s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json') upload_area = UploadArea(area_id) uploaded_file = UploadedFile(upload_area, s3object=s3obj) validation_event = ValidationEvent(file_ids=[uploaded_file.db_id], validation_id=validation_id, job_id='12345', status="SCHEDULED", docker_image="test_docker_image") validation_event.create_record() data = { "status": "VALIDATING", "job_id": validation_event.job_id, "payload": uploaded_file.info() } response = self.client.post(f"/v1/area/{area_id}/update_validation/{validation_id}", headers=self.authentication_header, data=json.dumps(data)) data = { "status": "VALIDATED", "job_id": validation_event.job_id, "payload": uploaded_file.info() } response = self.client.post(f"/v1/area/{area_id}/update_validation/{validation_id}", headers=self.authentication_header, data=json.dumps(data)) self.assertEqual(204, response.status_code) mock_format_and_send_notification.assert_called_once_with({ 'upload_area_id': area_id, 'name': 'foo.json', 'size': 3, 'last_modified': s3obj.last_modified.isoformat(), 'content_type': "application/json", 'url': f"s3://{self.upload_config.bucket_name}/{area_id}/foo.json", 'checksums': {'s3_etag': '1', 'sha1': '2', 'sha256': '3', 'crc32c': '4'} }) record = UploadDB().get_pg_record("validation", validation_id) self.assertEqual("VALIDATED", record["status"]) self.assertEqual("test_docker_image", record["docker_image"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at")))) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_ended_at")))) self.assertEqual(uploaded_file.info(), record.get("results")) response = self.client.get(f"/v1/area/{area_id}/foo.json/validate") validation_status = response.get_json()['validation_status'] self.assertEqual(validation_status, "VALIDATED")
def test_checksum_statuses_for_upload_area( self, mock_format_and_send_notification): db_area = self.create_upload_area() upload_area = UploadArea(db_area.uuid) checksum1_id = str(uuid.uuid4()) checksum2_id = str(uuid.uuid4()) checksum3_id = str(uuid.uuid4()) s3obj1 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo1.json') s3obj2 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo2.json') s3obj3 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo3.json') s3obj4 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo4.json') s3obj5 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo5.json') f1 = UploadedFile(upload_area, s3object=s3obj1) f2 = UploadedFile(upload_area, s3object=s3obj2) f3 = UploadedFile(upload_area, s3object=s3obj3) UploadedFile(upload_area, s3object=s3obj4) UploadedFile(upload_area, s3object=s3obj5) checksum1_event = ChecksumEvent(file_id=f1.db_id, checksum_id=checksum1_id, job_id='123', status="SCHEDULED") checksum2_event = ChecksumEvent(file_id=f2.db_id, checksum_id=checksum2_id, job_id='456', status="CHECKSUMMING") checksum3_event = ChecksumEvent(file_id=f3.db_id, checksum_id=checksum3_id, job_id='789', status="CHECKSUMMED") checksum1_event.create_record() checksum2_event.create_record() checksum3_event.create_record() response = self.client.get(f"/v1/area/{upload_area.uuid}/checksums") expected_data = { 'CHECKSUMMED': 1, 'CHECKSUMMING': 1, 'CHECKSUMMING_UNSCHEDULED': 2, 'SCHEDULED': 1, 'TOTAL_NUM_FILES': 5 } assert response.get_json() == expected_data
def test_get_checksum__for_a_file_with_checksum_records__returns_the_most_recent_record_status(self, mock_fasn): checksum_id = str(uuid.uuid4()) db_area = self.create_upload_area() upload_area = UploadArea(db_area.uuid) s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json') uploaded_file = UploadedFile(upload_area, s3object=s3obj) checksum_event = ChecksumEvent(file_id=uploaded_file.db_id, checksum_id=checksum_id, job_id='12345', status="SCHEDULED") checksum_event.create_record() response = self.client.get(f"/v1/area/{upload_area.uuid}/{uploaded_file.name}/checksum") info = response.get_json() self.assertEqual("SCHEDULED", info['checksum_status']) self.assertEqual(uploaded_file.checksums, info['checksums'])