コード例 #1
0
    def test_update_event_with_validation_event(self, mock_format_and_send_notification):

        validation_id = str(uuid.uuid4())
        area_id = self._create_area()
        s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json')
        upload_area = UploadArea(area_id)
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        validation_event = ValidationEvent(file_ids=[uploaded_file.db_id],
                                           validation_id=validation_id,
                                           job_id='12345',
                                           status="SCHEDULED")
        validation_event.create_record()
        validation_event.status = "VALIDATING"
        response = update_event(validation_event, uploaded_file.info(), self.client)
        self.assertEqual(204, response.status_code)
        record = UploadDB().get_pg_record("validation", validation_id)
        self.assertEqual("VALIDATING", record["status"])
        self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at"))))
        self.assertEqual(None, record["validation_ended_at"])
        self.assertEqual(None, record.get("results"))

        validation_event.status = "VALIDATED"
        response = update_event(validation_event, uploaded_file.info(), self.client)
        self.assertEqual(204, response.status_code)
        record = UploadDB().get_pg_record("validation", validation_id)
        self.assertEqual("VALIDATED", record["status"])
        self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at"))))
        self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_ended_at"))))
        self.assertEqual(uploaded_file.info(), record.get("results"))
コード例 #2
0
    def test_update_event_with_checksum_event(self, mock_format_and_send_notification):

        checksum_id = str(uuid.uuid4())
        area_uuid = self._create_area()
        s3obj = self.mock_upload_file_to_s3(area_uuid, 'foo.json')
        upload_area = UploadArea(area_uuid)
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        checksum_event = ChecksumEvent(file_id=uploaded_file.db_id,
                                       checksum_id=checksum_id,
                                       job_id='12345',
                                       status="SCHEDULED")
        checksum_event.create_record()

        checksum_event.status = "CHECKSUMMING"
        response = update_event(checksum_event, uploaded_file.info(), self.client)
        self.assertEqual(204, response.status_code)
        record = UploadDB().get_pg_record("checksum", checksum_id)
        self.assertEqual("CHECKSUMMING", record["status"])
        self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_started_at"))))
        self.assertEqual(None, record["checksum_ended_at"])

        checksum_event.status = "CHECKSUMMED"
        response = update_event(checksum_event, uploaded_file.info(), self.client)
        self.assertEqual(204, response.status_code)
        record = UploadDB().get_pg_record("checksum", checksum_id)
        self.assertEqual("CHECKSUMMED", record["status"])
        self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_started_at"))))
        self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_ended_at"))))
コード例 #3
0
    def test_schedule_validation__for_multiple_files__is_successful(self):
        area_id = self._create_area()
        self.mock_upload_file_to_s3(area_id, 'foo.json')
        self.mock_upload_file_to_s3(area_id, 'foo2.json')

        payload = {
            'validator_image': "humancellatlas/upload-validator-example",
            'files': ['foo.json', 'foo2.json']
        }
        response = self.client.put(
            f"/v1/area/{area_id}/validate",
            headers=self.authentication_header,
            json=payload
        )

        self.assertEqual(response.status_code, 200)
        validation_id = response.json['validation_id']
        validation_record = UploadDB().get_pg_record("validation", validation_id)
        self.assertEqual(validation_record['status'], "SCHEDULING_QUEUED")
        validation_files_records = UploadDB().get_pg_records("validation_files", validation_id, column='validation_id')
        file_one_record = UploadDB().get_pg_record("file", f"{area_id}/foo.json", "s3_key")
        file_two_record = UploadDB().get_pg_record("file", f"{area_id}/foo2.json", "s3_key")
        self.assertEqual(len(validation_files_records), 2)
        validation_file_db_ids = [record['file_id'] for record in validation_files_records]
        self.assertEqual(file_one_record['id'] in validation_file_db_ids, True)
        self.assertEqual(file_two_record['id'] in validation_file_db_ids, True)
コード例 #4
0
 def test_validating_status_file_validation(self, mock_format_and_send_notification):
     validation_id = str(uuid.uuid4())
     orig_val_id = str(uuid.uuid4())
     area_id = self._create_area()
     s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json')
     upload_area = UploadArea(area_id)
     uploaded_file = UploadedFile(upload_area, s3object=s3obj)
     validation_event = ValidationEvent(file_ids=[uploaded_file.db_id],
                                        validation_id=validation_id,
                                        job_id='12345',
                                        status="SCHEDULED",
                                        docker_image="test_docker_image",
                                        original_validation_id=orig_val_id)
     validation_event.create_record()
     data = {
         "status": "VALIDATING",
         "job_id": validation_event.job_id,
         "payload": uploaded_file.info()
     }
     response = self.client.post(f"/v1/area/{area_id}/update_validation/{validation_id}",
                                 headers=self.authentication_header,
                                 data=json.dumps(data))
     self.assertEqual(204, response.status_code)
     record = UploadDB().get_pg_record("validation", validation_id)
     self.assertEqual("test_docker_image", record["docker_image"])
     self.assertEqual(validation_id, record["id"])
     self.assertEqual(orig_val_id, record["original_validation_id"])
     self.assertEqual("VALIDATING", record["status"])
     self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at"))))
     self.assertEqual(None, record["validation_ended_at"])
     self.assertEqual(None, record.get("results"))
     response = self.client.get(f"/v1/area/{area_id}/foo.json/validate")
     validation_status = response.get_json()['validation_status']
     self.assertEqual(validation_status, "VALIDATING")
     mock_format_and_send_notification.assert_not_called()
コード例 #5
0
    def test_add_to_validation_sqs__adds_correct_event_to_queue(self):
        uploaded_file = UploadedFile.create(
            upload_area=self.upload_area,
            name="file2",
            content_type="application/octet-stream; dcp-type=data",
            data="file2_content")
        validation_scheduler = ValidationScheduler(self.upload_area_id,
                                                   [uploaded_file])

        validation_uuid = validation_scheduler.add_to_validation_sqs(
            ["filename123"], "test_docker_image", {"variable": "variable"},
            "123456")

        message = self.sqs.meta.client.receive_message(
            QueueUrl='test_validation_q_url')
        message_body = json.loads(message['Messages'][0]['Body'])
        record = UploadDB().get_pg_record("validation",
                                          validation_uuid,
                                          column='id')
        self.assertEqual(message_body["filenames"], ["filename123"])
        self.assertEqual(message_body["validation_id"], validation_uuid)
        self.assertEqual(message_body["validator_docker_image"],
                         "test_docker_image")
        self.assertEqual(message_body["environment"], {"variable": "variable"})
        self.assertEqual(message_body["orig_validation_id"], "123456")
        self.assertEqual(message_body["upload_area_uuid"],
                         uploaded_file.upload_area.uuid)
        self.assertEqual(record["status"], "SCHEDULING_QUEUED")
コード例 #6
0
 def __init__(self):
     self.api_key = os.environ["INGEST_API_KEY"]
     self.deployment_stage = os.environ["DEPLOYMENT_STAGE"]
     self.api_host = os.environ["API_HOST"]
     self.batch_client = boto3.client("batch")
     self.ec2_client = boto3.client('ec2')
     self.lambda_client = boto3.client('lambda')
     self.db = UploadDB()
コード例 #7
0
def health():
    """
    This api endpoint is invoked by the dcp wide status monitoring system.
    This function checks the health of underlying api gateway and db infrastructure.
    Running a simple query confirms that ecs pgbouncer is up running and talking to rds.
    """
    db_health_check_query = "SELECT count(*) from upload_area;"
    UploadDB().run_query(db_health_check_query)
    return requests.codes.ok
コード例 #8
0
    def setUp(self):
        super().setUp()
        self.area_uuid = str(uuid.uuid4())
        self.upload_area = UploadArea(self.area_uuid)
        self.db = UploadDB()

        self.db.create_pg_record(
            "upload_area", {
                "uuid": self.area_uuid,
                "status": "UNLOCKED",
                "bucket_name": self.upload_config.bucket_name
            })
コード例 #9
0
    def test_add_upload_area_to_delete_sqs(self):
        area_uuid = self._create_area()

        UploadArea(area_uuid).add_upload_area_to_delete_sqs()
        message = self.sqs.meta.client.receive_message(
            QueueUrl='delete_sqs_url')

        message_body = json.loads(message['Messages'][0]['Body'])
        self.assertEqual(message_body['area_uuid'], area_uuid)
        record = UploadDB().get_pg_record("upload_area",
                                          area_uuid,
                                          column='uuid')
        self.assertEqual(record['status'], "DELETION_QUEUED")
コード例 #10
0
    def test_upload_area_delete_over_timeout(self,
                                             mock_retrieve_lambda_timeout):
        area_uuid = self._create_area()
        obj = self.upload_bucket.Object(f'{area_uuid}/test_file')
        obj.put(Body="foo")
        mock_retrieve_lambda_timeout.return_value = 0

        area = UploadArea(area_uuid)
        area.delete()

        record = UploadDB().get_pg_record("upload_area",
                                          area_uuid,
                                          column='uuid')
        self.assertEqual("DELETION_QUEUED", record["status"])
コード例 #11
0
    def test_delete_with_id_of_real_non_empty_upload_area(self):
        area_uuid = self._create_area()

        obj = self.upload_bucket.Object(f'{area_uuid}/test_file')
        obj.put(Body="foo")

        response = self.client.delete(f"/v1/area/{area_uuid}",
                                      headers=self.authentication_header)

        self.assertEqual(202, response.status_code)
        record = UploadDB().get_pg_record("upload_area",
                                          area_uuid,
                                          column='uuid')
        self.assertEqual("DELETION_QUEUED", record["status"])
コード例 #12
0
    def test_locking_of_upload_area(self):
        area_uuid = self._create_area()
        record = UploadDB().get_pg_record("upload_area",
                                          area_uuid,
                                          column='uuid')
        self.assertEqual("UNLOCKED", record["status"])

        response = self.client.post(f"/v1/area/{area_uuid}/lock",
                                    headers=self.authentication_header)

        self.assertEqual(204, response.status_code)
        record = UploadDB().get_pg_record("upload_area",
                                          area_uuid,
                                          column='uuid')
        self.assertEqual("LOCKED", record["status"])

        response = self.client.delete(f"/v1/area/{area_uuid}/lock",
                                      headers=self.authentication_header)

        self.assertEqual(204, response.status_code)
        record = UploadDB().get_pg_record("upload_area",
                                          area_uuid,
                                          column='uuid')
        self.assertEqual("UNLOCKED", record["status"])
コード例 #13
0
 def test_validated_status_file_validation(self, mock_format_and_send_notification):
     validation_id = str(uuid.uuid4())
     area_id = self._create_area()
     s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json')
     upload_area = UploadArea(area_id)
     uploaded_file = UploadedFile(upload_area, s3object=s3obj)
     validation_event = ValidationEvent(file_ids=[uploaded_file.db_id],
                                        validation_id=validation_id,
                                        job_id='12345',
                                        status="SCHEDULED",
                                        docker_image="test_docker_image")
     validation_event.create_record()
     data = {
         "status": "VALIDATING",
         "job_id": validation_event.job_id,
         "payload": uploaded_file.info()
     }
     response = self.client.post(f"/v1/area/{area_id}/update_validation/{validation_id}",
                                 headers=self.authentication_header,
                                 data=json.dumps(data))
     data = {
         "status": "VALIDATED",
         "job_id": validation_event.job_id,
         "payload": uploaded_file.info()
     }
     response = self.client.post(f"/v1/area/{area_id}/update_validation/{validation_id}",
                                 headers=self.authentication_header,
                                 data=json.dumps(data))
     self.assertEqual(204, response.status_code)
     mock_format_and_send_notification.assert_called_once_with({
         'upload_area_id': area_id,
         'name': 'foo.json',
         'size': 3,
         'last_modified': s3obj.last_modified.isoformat(),
         'content_type': "application/json",
         'url': f"s3://{self.upload_config.bucket_name}/{area_id}/foo.json",
         'checksums': {'s3_etag': '1', 'sha1': '2', 'sha256': '3', 'crc32c': '4'}
     })
     record = UploadDB().get_pg_record("validation", validation_id)
     self.assertEqual("VALIDATED", record["status"])
     self.assertEqual("test_docker_image", record["docker_image"])
     self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at"))))
     self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_ended_at"))))
     self.assertEqual(uploaded_file.info(), record.get("results"))
     response = self.client.get(f"/v1/area/{area_id}/foo.json/validate")
     validation_status = response.get_json()['validation_status']
     self.assertEqual(validation_status, "VALIDATED")
コード例 #14
0
    def test_create_with_unused_upload_area_uuid(self):
        area_uuid = str(uuid.uuid4())

        response = self.client.post(f"/v1/area/{area_uuid}",
                                    headers=self.authentication_header)

        self.assertEqual(201, response.status_code)
        body = json.loads(response.data)
        self.assertEqual(
            {'uri': f"s3://{self.upload_config.bucket_name}/{area_uuid}/"},
            body)

        record = UploadDB().get_pg_record("upload_area",
                                          area_uuid,
                                          column='uuid')
        self.assertEqual(area_uuid, record["uuid"])
        self.assertEqual(self.upload_config.bucket_name, record["bucket_name"])
        self.assertEqual("UNLOCKED", record["status"])
コード例 #15
0
    def test_schedule_validation__with_original_validation_id__retains_original_validation_id(self):
        area_id = self._create_area()
        self.mock_upload_file_to_s3(area_id, 'foo.json')
        self.mock_upload_file_to_s3(area_id, 'foo2.json')

        payload = {
            'validator_image': "humancellatlas/upload-validator-example",
            'files': ['foo.json', 'foo2.json'],
            'original_validation_id': '123456'
        }
        response = self.client.put(
            f"/v1/area/{area_id}/validate",
            headers=self.authentication_header,
            json=payload
        )

        self.assertEqual(200, response.status_code)
        validation_id = response.json['validation_id']
        validation_record = UploadDB().get_pg_record("validation", validation_id)
        self.assertEqual(validation_record['status'], "SCHEDULING_QUEUED")
        self.assertEqual(validation_record['original_validation_id'], "123456")
コード例 #16
0
    def test_format_and_send_notification(self, mock_send_notification):
        area_uuid = str(uuid.uuid4())
        upload_area = UploadArea(area_uuid)
        upload_area.update_or_create()
        upload_area._db_load()
        file = upload_area.store_file("test_file_name", "test_file_content",
                                      "application/json; dcp-type=data")
        ingest_notifier = IngestNotifier("file_uploaded", file_id=file.db_id)

        test_payload = {
            'names': "[test_file_name]",
            'upload_area_id': area_uuid
        }
        notification_id = ingest_notifier.format_and_send_notification(
            test_payload)

        record = UploadDB().get_pg_record("notification",
                                          notification_id,
                                          column="id")
        self.assertEqual(record['status'], "DELIVERED")
        self.assertEqual(record['file_id'], file.db_id)
        self.assertEqual(record['payload'], test_payload)
コード例 #17
0
    def __init__(self):
        self.env = os.environ['DEPLOYMENT_STAGE']
        self.db = UploadDB()
        logger.debug(
            f"Running a health check for {self.env}. Results will be posted in #upload-service"
        )
        self.webhook = UploadConfig().slack_webhook

        self.stale_checksum_job_count_query = "SELECT COUNT(*) FROM checksum " \
                                              "WHERE status='CHECKSUMMING' " \
                                              "AND created_at > CURRENT_DATE - interval '4 weeks' " \
                                              "AND updated_at > CURRENT_TIMESTAMP - interval '2 hours'"
        self.stale_validation_job_count_query = "SELECT COUNT(*) FROM validation " \
                                                "WHERE status='VALIDATING' " \
                                                "AND created_at > CURRENT_DATE - interval '4 weeks' " \
                                                "AND updated_at > CURRENT_TIMESTAMP - interval '2 hours'"
        self.scheduled_checksum_job_count_query = "SELECT COUNT(*) FROM checksum " \
                                                  "WHERE status='SCHEDULED' " \
                                                  "AND created_at > CURRENT_DATE - interval '4 weeks' " \
                                                  "AND updated_at > CURRENT_TIMESTAMP - interval '2 hours'"
        self.scheduled_validation_job_count_query = "SELECT COUNT(*) FROM validation " \
                                                    "WHERE status='SCHEDULED' " \
                                                    "AND created_at > CURRENT_DATE - interval '4 weeks' " \
                                                    "AND updated_at > CURRENT_TIMESTAMP - interval '2 hours'"
        self.undeleted_areas_count_query = "SELECT COUNT(*) FROM upload_area " \
                                           "WHERE created_at > CURRENT_DATE - interval '4 weeks' " \
                                           "AND status != 'DELETED'"
        self.failed_checksum_count_query = "SELECT COUNT(*) FROM checksum " \
                                           "WHERE status='FAILED' " \
                                           "AND updated_at >= NOW() - '1 day'::INTERVAL"
        self.failed_validation_count_query = "SELECT COUNT(*) FROM validation " \
                                             "WHERE status='FAILED' " \
                                             "AND updated_at >= NOW() - '1 day'::INTERVAL"
        self.deadletter_metric_queries = [{
            'Id': 'visible_messages',
            'MetricStat': {
                'Metric': {
                    'Namespace':
                    'AWS/SQS',
                    'MetricName':
                    'ApproximateNumberOfMessagesVisible',
                    'Dimensions': [{
                        'Name':
                        'QueueName',
                        'Value':
                        f'dcp-upload-pre-csum-deadletter-queue-{self.env}'
                    }]
                },
                'Period': 90000,
                'Stat': 'Average'
            }
        }, {
            'Id': 'received_messages',
            'MetricStat': {
                'Metric': {
                    'Namespace':
                    'AWS/SQS',
                    'MetricName':
                    'NumberOfMessagesReceived',
                    'Dimensions': [{
                        'Name':
                        'QueueName',
                        'Value':
                        f'dcp-upload-pre-csum-deadletter-queue-{self.env}'
                    }]
                },
                'Period': 90000,
                'Stat': 'Average'
            }
        }]
        self.lambda_error_queries = [{
            'Id': 'upload_api_lambda_errors',
            'MetricStat': {
                'Metric': {
                    'Namespace':
                    'AWS/Lambda',
                    'MetricName':
                    'Errors',
                    'Dimensions': [{
                        'Name': 'FunctionName',
                        'Value': f'upload-api-{self.env}'
                    }]
                },
                'Period': 90000,
                'Stat': 'Sum'
            }
        }, {
            'Id': 'checksum_daemon_lambda_errors',
            'MetricStat': {
                'Metric': {
                    'Namespace':
                    'AWS/Lambda',
                    'MetricName':
                    'Errors',
                    'Dimensions': [{
                        'Name': 'FunctionName',
                        'Value': f'dcp-upload-csum-{self.env}'
                    }]
                },
                'Period': 90000,
                'Stat': 'Sum'
            }
        }]