def test_validating_status_file_validation(self, mock_format_and_send_notification):
     validation_id = str(uuid.uuid4())
     orig_val_id = str(uuid.uuid4())
     area_id = self._create_area()
     s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json')
     upload_area = UploadArea(area_id)
     uploaded_file = UploadedFile(upload_area, s3object=s3obj)
     validation_event = ValidationEvent(file_ids=[uploaded_file.db_id],
                                        validation_id=validation_id,
                                        job_id='12345',
                                        status="SCHEDULED",
                                        docker_image="test_docker_image",
                                        original_validation_id=orig_val_id)
     validation_event.create_record()
     data = {
         "status": "VALIDATING",
         "job_id": validation_event.job_id,
         "payload": uploaded_file.info()
     }
     response = self.client.post(f"/v1/area/{area_id}/update_validation/{validation_id}",
                                 headers=self.authentication_header,
                                 data=json.dumps(data))
     self.assertEqual(204, response.status_code)
     record = UploadDB().get_pg_record("validation", validation_id)
     self.assertEqual("test_docker_image", record["docker_image"])
     self.assertEqual(validation_id, record["id"])
     self.assertEqual(orig_val_id, record["original_validation_id"])
     self.assertEqual("VALIDATING", record["status"])
     self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at"))))
     self.assertEqual(None, record["validation_ended_at"])
     self.assertEqual(None, record.get("results"))
     response = self.client.get(f"/v1/area/{area_id}/foo.json/validate")
     validation_status = response.get_json()['validation_status']
     self.assertEqual(validation_status, "VALIDATING")
     mock_format_and_send_notification.assert_not_called()
Example #2
0
    def test_post_checksum__with_a_checksumming_payload__updates_db_record(self, mock_format_and_send_notification):
        checksum_id = str(uuid.uuid4())
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)
        s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json')
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        checksum_event = ChecksumEvent(file_id=uploaded_file.db_id,
                                       checksum_id=checksum_id,
                                       job_id='12345',
                                       status="SCHEDULED")
        checksum_event.create_record()

        response = self.client.post(f"/v1/area/{upload_area.uuid}/update_checksum/{checksum_id}",
                                    headers=self.authentication_header,
                                    json={
                                        "status": "CHECKSUMMING",
                                        "job_id": checksum_event.job_id,
                                        "payload": uploaded_file.info()
                                    })

        self.assertEqual(204, response.status_code)
        db_checksum = self.db.query(DbChecksum).filter(DbChecksum.id == checksum_id).one()
        self.assertEqual("CHECKSUMMING", db_checksum.status)

        mock_format_and_send_notification.assert_not_called()
    def test_update_event_with_validation_event(self, mock_format_and_send_notification):

        validation_id = str(uuid.uuid4())
        area_id = self._create_area()
        s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json')
        upload_area = UploadArea(area_id)
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        validation_event = ValidationEvent(file_ids=[uploaded_file.db_id],
                                           validation_id=validation_id,
                                           job_id='12345',
                                           status="SCHEDULED")
        validation_event.create_record()
        validation_event.status = "VALIDATING"
        response = update_event(validation_event, uploaded_file.info(), self.client)
        self.assertEqual(204, response.status_code)
        record = UploadDB().get_pg_record("validation", validation_id)
        self.assertEqual("VALIDATING", record["status"])
        self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at"))))
        self.assertEqual(None, record["validation_ended_at"])
        self.assertEqual(None, record.get("results"))

        validation_event.status = "VALIDATED"
        response = update_event(validation_event, uploaded_file.info(), self.client)
        self.assertEqual(204, response.status_code)
        record = UploadDB().get_pg_record("validation", validation_id)
        self.assertEqual("VALIDATED", record["status"])
        self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at"))))
        self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_ended_at"))))
        self.assertEqual(uploaded_file.info(), record.get("results"))
    def test_update_event_with_checksum_event(self, mock_format_and_send_notification):

        checksum_id = str(uuid.uuid4())
        area_uuid = self._create_area()
        s3obj = self.mock_upload_file_to_s3(area_uuid, 'foo.json')
        upload_area = UploadArea(area_uuid)
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        checksum_event = ChecksumEvent(file_id=uploaded_file.db_id,
                                       checksum_id=checksum_id,
                                       job_id='12345',
                                       status="SCHEDULED")
        checksum_event.create_record()

        checksum_event.status = "CHECKSUMMING"
        response = update_event(checksum_event, uploaded_file.info(), self.client)
        self.assertEqual(204, response.status_code)
        record = UploadDB().get_pg_record("checksum", checksum_id)
        self.assertEqual("CHECKSUMMING", record["status"])
        self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_started_at"))))
        self.assertEqual(None, record["checksum_ended_at"])

        checksum_event.status = "CHECKSUMMED"
        response = update_event(checksum_event, uploaded_file.info(), self.client)
        self.assertEqual(204, response.status_code)
        record = UploadDB().get_pg_record("checksum", checksum_id)
        self.assertEqual("CHECKSUMMED", record["status"])
        self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_started_at"))))
        self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_ended_at"))))
Example #5
0
    def test_post_checksum__for_an_obj_without_tags__updates_db_but_and_does_not_notify_ingest(
            self, mock_fasn):
        checksum_id = str(uuid.uuid4())
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)
        s3obj = self.mock_upload_file_to_s3(upload_area.uuid,
                                            'foo.json',
                                            checksums={})
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        checksum_event = ChecksumEvent(file_id=uploaded_file.db_id,
                                       checksum_id=checksum_id,
                                       job_id='12345',
                                       status="SCHEDULED")
        checksum_event.create_record()
        response = self.client.post(
            f"/v1/area/{upload_area.uuid}/update_checksum/{checksum_id}",
            json={
                "status": "CHECKSUMMED",
                "job_id": checksum_event.job_id,
                "payload": uploaded_file.info()
            })

        self.assertEqual(204, response.status_code)
        db_checksum = self.db.query(DbChecksum).filter(
            DbChecksum.id == checksum_id).one()
        self.assertEqual("CHECKSUMMED", db_checksum.status)

        mock_fasn.assert_not_called()
Example #6
0
    def test_checksum_statuses_for_upload_area(
            self, mock_format_and_send_notification):
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)

        checksum1_id = str(uuid.uuid4())
        checksum2_id = str(uuid.uuid4())
        checksum3_id = str(uuid.uuid4())

        s3obj1 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo1.json')
        s3obj2 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo2.json')
        s3obj3 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo3.json')
        s3obj4 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo4.json')
        s3obj5 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo5.json')

        f1 = UploadedFile(upload_area, s3object=s3obj1)
        f2 = UploadedFile(upload_area, s3object=s3obj2)
        f3 = UploadedFile(upload_area, s3object=s3obj3)
        UploadedFile(upload_area, s3object=s3obj4)
        UploadedFile(upload_area, s3object=s3obj5)

        checksum1_event = ChecksumEvent(file_id=f1.db_id,
                                        checksum_id=checksum1_id,
                                        job_id='123',
                                        status="SCHEDULED")
        checksum2_event = ChecksumEvent(file_id=f2.db_id,
                                        checksum_id=checksum2_id,
                                        job_id='456',
                                        status="CHECKSUMMING")
        checksum3_event = ChecksumEvent(file_id=f3.db_id,
                                        checksum_id=checksum3_id,
                                        job_id='789',
                                        status="CHECKSUMMED")
        checksum1_event.create_record()
        checksum2_event.create_record()
        checksum3_event.create_record()

        response = self.client.get(f"/v1/area/{upload_area.uuid}/checksums")
        expected_data = {
            'CHECKSUMMED': 1,
            'CHECKSUMMING': 1,
            'CHECKSUMMING_UNSCHEDULED': 2,
            'SCHEDULED': 1,
            'TOTAL_NUM_FILES': 5
        }

        assert response.get_json() == expected_data
 def test_unscheduled_status_file_validation(self, mock_format_and_send_notification):
     area_id = self._create_area()
     s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json')
     upload_area = UploadArea(area_id)
     UploadedFile(upload_area, s3object=s3obj)
     response = self.client.get(f"/v1/area/{area_id}/foo.json/validate")
     validation_status = response.get_json()['validation_status']
     self.assertEqual(validation_status, "UNSCHEDULED")
Example #8
0
    def test_validation_statuses_for_upload_area(
            self, mock_format_and_send_notification):
        area_id = self._create_area()
        upload_area = UploadArea(area_id)

        validation1_id = str(uuid.uuid4())
        validation2_id = str(uuid.uuid4())
        validation3_id = str(uuid.uuid4())
        validation4_id = str(uuid.uuid4())

        s3obj1 = self.mock_upload_file_to_s3(area_id, 'foo1.json')
        s3obj2 = self.mock_upload_file_to_s3(area_id, 'foo2.json')
        s3obj3 = self.mock_upload_file_to_s3(area_id, 'foo3.json')
        s3obj4 = self.mock_upload_file_to_s3(area_id, 'foo4.json')

        f1 = UploadedFile(upload_area, s3object=s3obj1)
        f2 = UploadedFile(upload_area, s3object=s3obj2)
        f3 = UploadedFile(upload_area, s3object=s3obj3)
        f4 = UploadedFile(upload_area, s3object=s3obj4)

        validation_event1 = ValidationEvent(file_ids=[f1.db_id],
                                            validation_id=validation1_id,
                                            job_id='12345',
                                            status="SCHEDULED")
        validation_event2 = ValidationEvent(file_ids=[f2.db_id],
                                            validation_id=validation2_id,
                                            job_id='23456',
                                            status="VALIDATING")
        validation_event3 = ValidationEvent(file_ids=[f3.db_id],
                                            validation_id=validation3_id,
                                            job_id='34567',
                                            status="VALIDATED")
        validation_event4 = ValidationEvent(file_ids=[f4.db_id],
                                            validation_id=validation4_id,
                                            job_id='45678',
                                            status="VALIDATING")
        validation_event3.results = 'VALID'
        validation_event1.create_record()
        validation_event2.create_record()
        validation_event3.create_record()
        validation_event4.create_record()

        response = self.client.get(f"/v1/area/{area_id}/validations")
        expected_data = {'SCHEDULED': 1, 'VALIDATED': 1, 'VALIDATING': 2}
        self.assertEqual(expected_data, response.get_json())
Example #9
0
    def test_init__doesnt_create_db_record_if_one_already_exists(self):
        s3object = self.create_s3_object(f"{self.upload_area_id}/foo")
        self.create_file_record(s3object)

        record_count_before = self.db.query(DbFile).count()

        UploadedFile(upload_area=self.upload_area, s3object=s3object)

        self.assertEqual(record_count_before, self.db.query(DbFile).count())
Example #10
0
    def test_get_checksum__for_a_file_with_no_checksum_records__returns_status_unscheduled(self, mock_fasn):
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)
        s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json')
        UploadedFile(upload_area, s3object=s3obj)  # creates file record

        response = self.client.get(f"/v1/area/{upload_area.uuid}/foo.json/checksum")

        checksum_status = response.get_json()['checksum_status']
        self.assertEqual("UNSCHEDULED", checksum_status)
Example #11
0
    def test_init__doesnt_create_db_record_if_one_already_exists(self):
        filename = f"file-{random.randint(0, 999999999)}"
        s3_key = f"{self.upload_area_id}/{filename}"
        s3object = self.create_s3_object(s3_key)
        self.create_file_record(s3object)
        record_count_before = self.db.query(DbFile).filter(DbFile.s3_key == s3_key).count()

        UploadedFile(upload_area=self.upload_area, s3object=s3object)

        record_count_after = self.db.query(DbFile).filter(DbFile.s3_key == s3_key).count()
        self.assertEqual(record_count_before, record_count_after)
 def test_scheduled_status_file_validation(self, mock_format_and_send_notification):
     validation_id = str(uuid.uuid4())
     area_id = self._create_area()
     s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json')
     upload_area = UploadArea(area_id)
     uploaded_file = UploadedFile(upload_area, s3object=s3obj)
     validation_event = ValidationEvent(file_ids=[uploaded_file.db_id],
                                        validation_id=validation_id,
                                        job_id='12345',
                                        status="SCHEDULED")
     validation_event.create_record()
     response = self.client.get(f"/v1/area/{area_id}/foo.json/validate")
     validation_status = response.get_json()['validation_status']
     self.assertEqual(validation_status, "SCHEDULED")
Example #13
0
    def test_info(self):
        test_file = FixtureFile.factory("foo")
        s3object = self.create_s3_object(f"{self.upload_area_id}/foo", content=test_file.contents)
        file_record = self.create_file_record(s3object, checksums=test_file.checksums)
        uf = UploadedFile(self.upload_area, s3object=s3object)

        self.assertEqual({
            'upload_area_id': self.upload_area.uuid,
            'name': file_record.name,
            'size': s3object.content_length,
            'content_type': s3object.content_type,
            'url': f"s3://{s3object.bucket_name}/{s3object.key}",
            'checksums': test_file.checksums,
            'last_modified': s3object.last_modified.isoformat()
        }, uf.info())
Example #14
0
    def test_init__given_existing_entities__initializes_properties_correctly(self):
        s3object = self.create_s3_object(f"{self.upload_area_id}/foo")
        file_record = self.create_file_record(s3object)

        uf = UploadedFile(self.upload_area, s3object=s3object)

        # Links to objects
        self.assertEqual(s3object, uf.s3object)
        self.assertEqual(self.upload_area, uf.upload_area)
        # Persisted properties
        self.assertEqual(file_record.id, uf.db_id)
        self.assertEqual(s3object.key, uf.s3_key)
        self.assertEqual(s3object.e_tag.strip('\"'), uf.s3_etag)
        self.assertEqual(self.upload_area.db_id, uf._properties['upload_area_id'])
        self.assertEqual(file_record.name, uf.name)
        self.assertEqual(s3object.content_length, uf.size)
 def test_validated_status_file_validation(self, mock_format_and_send_notification):
     validation_id = str(uuid.uuid4())
     area_id = self._create_area()
     s3obj = self.mock_upload_file_to_s3(area_id, 'foo.json')
     upload_area = UploadArea(area_id)
     uploaded_file = UploadedFile(upload_area, s3object=s3obj)
     validation_event = ValidationEvent(file_ids=[uploaded_file.db_id],
                                        validation_id=validation_id,
                                        job_id='12345',
                                        status="SCHEDULED",
                                        docker_image="test_docker_image")
     validation_event.create_record()
     data = {
         "status": "VALIDATING",
         "job_id": validation_event.job_id,
         "payload": uploaded_file.info()
     }
     response = self.client.post(f"/v1/area/{area_id}/update_validation/{validation_id}",
                                 headers=self.authentication_header,
                                 data=json.dumps(data))
     data = {
         "status": "VALIDATED",
         "job_id": validation_event.job_id,
         "payload": uploaded_file.info()
     }
     response = self.client.post(f"/v1/area/{area_id}/update_validation/{validation_id}",
                                 headers=self.authentication_header,
                                 data=json.dumps(data))
     self.assertEqual(204, response.status_code)
     mock_format_and_send_notification.assert_called_once_with({
         'upload_area_id': area_id,
         'name': 'foo.json',
         'size': 3,
         'last_modified': s3obj.last_modified.isoformat(),
         'content_type': "application/json",
         'url': f"s3://{self.upload_config.bucket_name}/{area_id}/foo.json",
         'checksums': {'s3_etag': '1', 'sha1': '2', 'sha256': '3', 'crc32c': '4'}
     })
     record = UploadDB().get_pg_record("validation", validation_id)
     self.assertEqual("VALIDATED", record["status"])
     self.assertEqual("test_docker_image", record["docker_image"])
     self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_started_at"))))
     self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("validation_ended_at"))))
     self.assertEqual(uploaded_file.info(), record.get("results"))
     response = self.client.get(f"/v1/area/{area_id}/foo.json/validate")
     validation_status = response.get_json()['validation_status']
     self.assertEqual(validation_status, "VALIDATED")
Example #16
0
    def test_get_checksum__for_a_file_with_checksum_records__returns_the_most_recent_record_status(self, mock_fasn):
        checksum_id = str(uuid.uuid4())
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)
        s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json')
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        checksum_event = ChecksumEvent(file_id=uploaded_file.db_id,
                                       checksum_id=checksum_id,
                                       job_id='12345',
                                       status="SCHEDULED")
        checksum_event.create_record()

        response = self.client.get(f"/v1/area/{upload_area.uuid}/{uploaded_file.name}/checksum")

        info = response.get_json()
        self.assertEqual("SCHEDULED", info['checksum_status'])
        self.assertEqual(uploaded_file.checksums, info['checksums'])
Example #17
0
    def test_init__when_no_db_record_exists__creates_a_db_record(self):
        s3object = self.create_s3_object(f"{self.upload_area_id}/foo")

        with self.assertRaises(NoResultFound):
            self.db.query(DbFile).filter(DbFile.s3_key == s3object.key,
                                         DbFile.s3_etag == s3object.e_tag.strip('\"')).one()

        uf = UploadedFile(upload_area=self.upload_area, s3object=s3object)

        record = self.db.query(DbFile).filter(DbFile.s3_key == s3object.key,
                                              DbFile.s3_etag == s3object.e_tag.strip('\"')).one()
        self.assertEqual(record.id, uf.db_id)
        self.assertEqual(s3object.key, record.s3_key)
        self.assertEqual("foo", record.name)
        self.assertEqual(s3object.e_tag.strip('\"'), record.s3_etag)
        self.assertEqual(s3object.content_length, record.size)
        self.assertEqual(self.upload_area.db_id, record.upload_area_id)
Example #18
0
    def test_post_checksum__with_a_checksummed_payload__updates_db_records_and_notifies_ingest(
            self, mock_fasn):
        checksum_id = str(uuid.uuid4())
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)
        s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json')
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        checksum_event = ChecksumEvent(file_id=uploaded_file.db_id,
                                       checksum_id=checksum_id,
                                       job_id='12345',
                                       status="SCHEDULED")
        checksum_event.create_record()
        checksums = {'s3_etag': '1', 'sha1': '2', 'sha256': '3', 'crc32c': '4'}
        response = self.client.post(
            f"/v1/area/{upload_area.uuid}/update_checksum/{checksum_id}",
            json={
                "status": "CHECKSUMMED",
                "job_id": checksum_event.job_id,
                "payload": {
                    "upload_area_id": upload_area.db_id,
                    "name": uploaded_file.name,
                    "checksums": checksums
                }
            })

        self.assertEqual(204, response.status_code)

        # Checksum record status should be updated
        db_checksum = self.db.query(DbChecksum).filter(
            DbChecksum.id == checksum_id).one()
        self.assertEqual("CHECKSUMMED", db_checksum.status)

        # Checksums should be stored in File record
        db_file = self.db.query(DbFile).filter(
            DbFile.id == uploaded_file.db_id).one()
        self.assertEqual(checksums, db_file.checksums)

        # Ingest should be notified
        mock_fasn.assert_called()