def test_update_event_with_validation_event(self, mock_format_and_send_notification): validation_id = str(uuid.uuid4()) area_id = self._create_area() s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json') upload_area = UploadArea(area_id) uploaded_file = UploadedFile(upload_area, s3object=s3obj) validation_event = ValidationEvent(file_ids=[uploaded_file.db_id], validation_id=validation_id, job_id='12345', status="SCHEDULED") validation_event.create_record() validation_event.status = "VALIDATING" response = update_event(validation_event, uploaded_file.info(), self.client) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("validation", validation_id) self.assertEqual("VALIDATING", record["status"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at")))) self.assertEqual(None, record["validation_ended_at"]) self.assertEqual(None, record.get("results")) validation_event.status = "VALIDATED" response = update_event(validation_event, uploaded_file.info(), self.client) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("validation", validation_id) self.assertEqual("VALIDATED", record["status"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at")))) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_ended_at")))) self.assertEqual(uploaded_file.info(), record.get("results"))
def test_update_event_with_checksum_event(self, mock_format_and_send_notification): checksum_id = str(uuid.uuid4()) area_uuid = self._create_area() s3obj = self.mock_upload_file_to_s3(area_uuid, 'foo.json') upload_area = UploadArea(area_uuid) uploaded_file = UploadedFile(upload_area, s3object=s3obj) checksum_event = ChecksumEvent(file_id=uploaded_file.db_id, checksum_id=checksum_id, job_id='12345', status="SCHEDULED") checksum_event.create_record() checksum_event.status = "CHECKSUMMING" response = update_event(checksum_event, uploaded_file.info(), self.client) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("checksum", checksum_id) self.assertEqual("CHECKSUMMING", record["status"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_started_at")))) self.assertEqual(None, record["checksum_ended_at"]) checksum_event.status = "CHECKSUMMED" response = update_event(checksum_event, uploaded_file.info(), self.client) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("checksum", checksum_id) self.assertEqual("CHECKSUMMED", record["status"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_started_at")))) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_ended_at"))))
def test_schedule_validation__for_multiple_files__is_successful(self): area_id = self._create_area() self.mock_upload_file_to_s3(area_id, 'foo.json') self.mock_upload_file_to_s3(area_id, 'foo2.json') payload = { 'validator_image': "humancellatlas/upload-validator-example", 'files': ['foo.json', 'foo2.json'] } response = self.client.put( f"/v1/area/{area_id}/validate", headers=self.authentication_header, json=payload ) self.assertEqual(response.status_code, 200) validation_id = response.json['validation_id'] validation_record = UploadDB().get_pg_record("validation", validation_id) self.assertEqual(validation_record['status'], "SCHEDULING_QUEUED") validation_files_records = UploadDB().get_pg_records("validation_files", validation_id, column='validation_id') file_one_record = UploadDB().get_pg_record("file", f"{area_id}/foo.json", "s3_key") file_two_record = UploadDB().get_pg_record("file", f"{area_id}/foo2.json", "s3_key") self.assertEqual(len(validation_files_records), 2) validation_file_db_ids = [record['file_id'] for record in validation_files_records] self.assertEqual(file_one_record['id'] in validation_file_db_ids, True) self.assertEqual(file_two_record['id'] in validation_file_db_ids, True)
def test_validating_status_file_validation(self, mock_format_and_send_notification): validation_id = str(uuid.uuid4()) orig_val_id = str(uuid.uuid4()) area_id = self._create_area() s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json') upload_area = UploadArea(area_id) uploaded_file = UploadedFile(upload_area, s3object=s3obj) validation_event = ValidationEvent(file_ids=[uploaded_file.db_id], validation_id=validation_id, job_id='12345', status="SCHEDULED", docker_image="test_docker_image", original_validation_id=orig_val_id) validation_event.create_record() data = { "status": "VALIDATING", "job_id": validation_event.job_id, "payload": uploaded_file.info() } response = self.client.post(f"/v1/area/{area_id}/update_validation/{validation_id}", headers=self.authentication_header, data=json.dumps(data)) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("validation", validation_id) self.assertEqual("test_docker_image", record["docker_image"]) self.assertEqual(validation_id, record["id"]) self.assertEqual(orig_val_id, record["original_validation_id"]) self.assertEqual("VALIDATING", record["status"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at")))) self.assertEqual(None, record["validation_ended_at"]) self.assertEqual(None, record.get("results")) response = self.client.get(f"/v1/area/{area_id}/foo.json/validate") validation_status = response.get_json()['validation_status'] self.assertEqual(validation_status, "VALIDATING") mock_format_and_send_notification.assert_not_called()
def test_add_to_validation_sqs__adds_correct_event_to_queue(self): uploaded_file = UploadedFile.create( upload_area=self.upload_area, name="file2", content_type="application/octet-stream; dcp-type=data", data="file2_content") validation_scheduler = ValidationScheduler(self.upload_area_id, [uploaded_file]) validation_uuid = validation_scheduler.add_to_validation_sqs( ["filename123"], "test_docker_image", {"variable": "variable"}, "123456") message = self.sqs.meta.client.receive_message( QueueUrl='test_validation_q_url') message_body = json.loads(message['Messages'][0]['Body']) record = UploadDB().get_pg_record("validation", validation_uuid, column='id') self.assertEqual(message_body["filenames"], ["filename123"]) self.assertEqual(message_body["validation_id"], validation_uuid) self.assertEqual(message_body["validator_docker_image"], "test_docker_image") self.assertEqual(message_body["environment"], {"variable": "variable"}) self.assertEqual(message_body["orig_validation_id"], "123456") self.assertEqual(message_body["upload_area_uuid"], uploaded_file.upload_area.uuid) self.assertEqual(record["status"], "SCHEDULING_QUEUED")
def __init__(self): self.api_key = os.environ["INGEST_API_KEY"] self.deployment_stage = os.environ["DEPLOYMENT_STAGE"] self.api_host = os.environ["API_HOST"] self.batch_client = boto3.client("batch") self.ec2_client = boto3.client('ec2') self.lambda_client = boto3.client('lambda') self.db = UploadDB()
def health(): """ This api endpoint is invoked by the dcp wide status monitoring system. This function checks the health of underlying api gateway and db infrastructure. Running a simple query confirms that ecs pgbouncer is up running and talking to rds. """ db_health_check_query = "SELECT count(*) from upload_area;" UploadDB().run_query(db_health_check_query) return requests.codes.ok
def setUp(self): super().setUp() self.area_uuid = str(uuid.uuid4()) self.upload_area = UploadArea(self.area_uuid) self.db = UploadDB() self.db.create_pg_record( "upload_area", { "uuid": self.area_uuid, "status": "UNLOCKED", "bucket_name": self.upload_config.bucket_name })
def test_add_upload_area_to_delete_sqs(self): area_uuid = self._create_area() UploadArea(area_uuid).add_upload_area_to_delete_sqs() message = self.sqs.meta.client.receive_message( QueueUrl='delete_sqs_url') message_body = json.loads(message['Messages'][0]['Body']) self.assertEqual(message_body['area_uuid'], area_uuid) record = UploadDB().get_pg_record("upload_area", area_uuid, column='uuid') self.assertEqual(record['status'], "DELETION_QUEUED")
def test_upload_area_delete_over_timeout(self, mock_retrieve_lambda_timeout): area_uuid = self._create_area() obj = self.upload_bucket.Object(f'{area_uuid}/test_file') obj.put(Body="foo") mock_retrieve_lambda_timeout.return_value = 0 area = UploadArea(area_uuid) area.delete() record = UploadDB().get_pg_record("upload_area", area_uuid, column='uuid') self.assertEqual("DELETION_QUEUED", record["status"])
def test_delete_with_id_of_real_non_empty_upload_area(self): area_uuid = self._create_area() obj = self.upload_bucket.Object(f'{area_uuid}/test_file') obj.put(Body="foo") response = self.client.delete(f"/v1/area/{area_uuid}", headers=self.authentication_header) self.assertEqual(202, response.status_code) record = UploadDB().get_pg_record("upload_area", area_uuid, column='uuid') self.assertEqual("DELETION_QUEUED", record["status"])
def test_locking_of_upload_area(self): area_uuid = self._create_area() record = UploadDB().get_pg_record("upload_area", area_uuid, column='uuid') self.assertEqual("UNLOCKED", record["status"]) response = self.client.post(f"/v1/area/{area_uuid}/lock", headers=self.authentication_header) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("upload_area", area_uuid, column='uuid') self.assertEqual("LOCKED", record["status"]) response = self.client.delete(f"/v1/area/{area_uuid}/lock", headers=self.authentication_header) self.assertEqual(204, response.status_code) record = UploadDB().get_pg_record("upload_area", area_uuid, column='uuid') self.assertEqual("UNLOCKED", record["status"])
def test_validated_status_file_validation(self, mock_format_and_send_notification): validation_id = str(uuid.uuid4()) area_id = self._create_area() s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json') upload_area = UploadArea(area_id) uploaded_file = UploadedFile(upload_area, s3object=s3obj) validation_event = ValidationEvent(file_ids=[uploaded_file.db_id], validation_id=validation_id, job_id='12345', status="SCHEDULED", docker_image="test_docker_image") validation_event.create_record() data = { "status": "VALIDATING", "job_id": validation_event.job_id, "payload": uploaded_file.info() } response = self.client.post(f"/v1/area/{area_id}/update_validation/{validation_id}", headers=self.authentication_header, data=json.dumps(data)) data = { "status": "VALIDATED", "job_id": validation_event.job_id, "payload": uploaded_file.info() } response = self.client.post(f"/v1/area/{area_id}/update_validation/{validation_id}", headers=self.authentication_header, data=json.dumps(data)) self.assertEqual(204, response.status_code) mock_format_and_send_notification.assert_called_once_with({ 'upload_area_id': area_id, 'name': 'foo.json', 'size': 3, 'last_modified': s3obj.last_modified.isoformat(), 'content_type': "application/json", 'url': f"s3://{self.upload_config.bucket_name}/{area_id}/foo.json", 'checksums': {'s3_etag': '1', 'sha1': '2', 'sha256': '3', 'crc32c': '4'} }) record = UploadDB().get_pg_record("validation", validation_id) self.assertEqual("VALIDATED", record["status"]) self.assertEqual("test_docker_image", record["docker_image"]) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at")))) self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_ended_at")))) self.assertEqual(uploaded_file.info(), record.get("results")) response = self.client.get(f"/v1/area/{area_id}/foo.json/validate") validation_status = response.get_json()['validation_status'] self.assertEqual(validation_status, "VALIDATED")
def test_create_with_unused_upload_area_uuid(self): area_uuid = str(uuid.uuid4()) response = self.client.post(f"/v1/area/{area_uuid}", headers=self.authentication_header) self.assertEqual(201, response.status_code) body = json.loads(response.data) self.assertEqual( {'uri': f"s3://{self.upload_config.bucket_name}/{area_uuid}/"}, body) record = UploadDB().get_pg_record("upload_area", area_uuid, column='uuid') self.assertEqual(area_uuid, record["uuid"]) self.assertEqual(self.upload_config.bucket_name, record["bucket_name"]) self.assertEqual("UNLOCKED", record["status"])
def test_schedule_validation__with_original_validation_id__retains_original_validation_id(self): area_id = self._create_area() self.mock_upload_file_to_s3(area_id, 'foo.json') self.mock_upload_file_to_s3(area_id, 'foo2.json') payload = { 'validator_image': "humancellatlas/upload-validator-example", 'files': ['foo.json', 'foo2.json'], 'original_validation_id': '123456' } response = self.client.put( f"/v1/area/{area_id}/validate", headers=self.authentication_header, json=payload ) self.assertEqual(200, response.status_code) validation_id = response.json['validation_id'] validation_record = UploadDB().get_pg_record("validation", validation_id) self.assertEqual(validation_record['status'], "SCHEDULING_QUEUED") self.assertEqual(validation_record['original_validation_id'], "123456")
def test_format_and_send_notification(self, mock_send_notification): area_uuid = str(uuid.uuid4()) upload_area = UploadArea(area_uuid) upload_area.update_or_create() upload_area._db_load() file = upload_area.store_file("test_file_name", "test_file_content", "application/json; dcp-type=data") ingest_notifier = IngestNotifier("file_uploaded", file_id=file.db_id) test_payload = { 'names': "[test_file_name]", 'upload_area_id': area_uuid } notification_id = ingest_notifier.format_and_send_notification( test_payload) record = UploadDB().get_pg_record("notification", notification_id, column="id") self.assertEqual(record['status'], "DELIVERED") self.assertEqual(record['file_id'], file.db_id) self.assertEqual(record['payload'], test_payload)
def __init__(self): self.env = os.environ['DEPLOYMENT_STAGE'] self.db = UploadDB() logger.debug( f"Running a health check for {self.env}. Results will be posted in #upload-service" ) self.webhook = UploadConfig().slack_webhook self.stale_checksum_job_count_query = "SELECT COUNT(*) FROM checksum " \ "WHERE status='CHECKSUMMING' " \ "AND created_at > CURRENT_DATE - interval '4 weeks' " \ "AND updated_at > CURRENT_TIMESTAMP - interval '2 hours'" self.stale_validation_job_count_query = "SELECT COUNT(*) FROM validation " \ "WHERE status='VALIDATING' " \ "AND created_at > CURRENT_DATE - interval '4 weeks' " \ "AND updated_at > CURRENT_TIMESTAMP - interval '2 hours'" self.scheduled_checksum_job_count_query = "SELECT COUNT(*) FROM checksum " \ "WHERE status='SCHEDULED' " \ "AND created_at > CURRENT_DATE - interval '4 weeks' " \ "AND updated_at > CURRENT_TIMESTAMP - interval '2 hours'" self.scheduled_validation_job_count_query = "SELECT COUNT(*) FROM validation " \ "WHERE status='SCHEDULED' " \ "AND created_at > CURRENT_DATE - interval '4 weeks' " \ "AND updated_at > CURRENT_TIMESTAMP - interval '2 hours'" self.undeleted_areas_count_query = "SELECT COUNT(*) FROM upload_area " \ "WHERE created_at > CURRENT_DATE - interval '4 weeks' " \ "AND status != 'DELETED'" self.failed_checksum_count_query = "SELECT COUNT(*) FROM checksum " \ "WHERE status='FAILED' " \ "AND updated_at >= NOW() - '1 day'::INTERVAL" self.failed_validation_count_query = "SELECT COUNT(*) FROM validation " \ "WHERE status='FAILED' " \ "AND updated_at >= NOW() - '1 day'::INTERVAL" self.deadletter_metric_queries = [{ 'Id': 'visible_messages', 'MetricStat': { 'Metric': { 'Namespace': 'AWS/SQS', 'MetricName': 'ApproximateNumberOfMessagesVisible', 'Dimensions': [{ 'Name': 'QueueName', 'Value': f'dcp-upload-pre-csum-deadletter-queue-{self.env}' }] }, 'Period': 90000, 'Stat': 'Average' } }, { 'Id': 'received_messages', 'MetricStat': { 'Metric': { 'Namespace': 'AWS/SQS', 'MetricName': 'NumberOfMessagesReceived', 'Dimensions': [{ 'Name': 'QueueName', 'Value': f'dcp-upload-pre-csum-deadletter-queue-{self.env}' }] }, 'Period': 90000, 'Stat': 'Average' } }] self.lambda_error_queries = [{ 'Id': 'upload_api_lambda_errors', 'MetricStat': { 'Metric': { 'Namespace': 'AWS/Lambda', 'MetricName': 'Errors', 'Dimensions': [{ 'Name': 'FunctionName', 'Value': f'upload-api-{self.env}' }] }, 'Period': 90000, 'Stat': 'Sum' } }, { 'Id': 'checksum_daemon_lambda_errors', 'MetricStat': { 'Metric': { 'Namespace': 'AWS/Lambda', 'MetricName': 'Errors', 'Dimensions': [{ 'Name': 'FunctionName', 'Value': f'dcp-upload-csum-{self.env}' }] }, 'Period': 90000, 'Stat': 'Sum' } }]