Ejemplo n.º 1
0
    def test_post_checksum__for_an_obj_without_tags__updates_db_but_and_does_not_notify_ingest(
            self, mock_fasn):
        checksum_id = str(uuid.uuid4())
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)
        s3obj = self.mock_upload_file_to_s3(upload_area.uuid,
                                            'foo.json',
                                            checksums={})
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        checksum_event = ChecksumEvent(file_id=uploaded_file.db_id,
                                       checksum_id=checksum_id,
                                       job_id='12345',
                                       status="SCHEDULED")
        checksum_event.create_record()
        response = self.client.post(
            f"/v1/area/{upload_area.uuid}/update_checksum/{checksum_id}",
            json={
                "status": "CHECKSUMMED",
                "job_id": checksum_event.job_id,
                "payload": uploaded_file.info()
            })

        self.assertEqual(204, response.status_code)
        db_checksum = self.db.query(DbChecksum).filter(
            DbChecksum.id == checksum_id).one()
        self.assertEqual("CHECKSUMMED", db_checksum.status)

        mock_fasn.assert_not_called()
Ejemplo n.º 2
0
    def test_post_checksum__with_a_checksumming_payload__updates_db_record(self, mock_format_and_send_notification):
        checksum_id = str(uuid.uuid4())
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)
        s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json')
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        checksum_event = ChecksumEvent(file_id=uploaded_file.db_id,
                                       checksum_id=checksum_id,
                                       job_id='12345',
                                       status="SCHEDULED")
        checksum_event.create_record()

        response = self.client.post(f"/v1/area/{upload_area.uuid}/update_checksum/{checksum_id}",
                                    headers=self.authentication_header,
                                    json={
                                        "status": "CHECKSUMMING",
                                        "job_id": checksum_event.job_id,
                                        "payload": uploaded_file.info()
                                    })

        self.assertEqual(204, response.status_code)
        db_checksum = self.db.query(DbChecksum).filter(DbChecksum.id == checksum_id).one()
        self.assertEqual("CHECKSUMMING", db_checksum.status)

        mock_format_and_send_notification.assert_not_called()
Ejemplo n.º 3
0
    def __init__(self, argv):
        self.bucket_name = None
        self.s3_object_key = None
        self.upload_area_id = None
        self.file_name = None
        UploadConfig.use_env = True  # AWS Secrets are not available to batch jobs, use environment
        self._parse_args(argv)
        s3 = boto3.resource('s3')
        s3obj = s3.Bucket(self.bucket_name).Object(self.s3_object_key)
        self.checksums = DssChecksums(s3obj)

        self.checksum_event = ChecksumEvent(
            checksum_id=os.environ['CHECKSUM_ID'],
            job_id=os.environ['AWS_BATCH_JOB_ID'])

        if self._object_contents_are_not_what_we_expect(s3obj):
            # Object has been overwritten with different contents.  Abort.
            self._update_checksum_event(status="ABORTED")
            return

        if self.checksums.are_present():
            logger.info(f"File {self.s3_object_key} is already checksummed.")
            self._update_checksum_event(status="CHECKSUMMED")
        else:
            logger.info(f"Checksumming {self.s3_object_key}...")
            self._update_checksum_event(status="CHECKSUMMING")
            self.checksums.compute(report_progress=True)
            self.checksums.save_as_tags_on_s3_object()
            self._update_checksum_event(status="CHECKSUMMED")
            logger.info(
                f"Checksums {dict(self.checksums)} used to tag file {self.s3_object_key}"
            )
Ejemplo n.º 4
0
    def test_get_checksum__for_a_file_with_checksum_records__returns_the_most_recent_record_status(self, mock_fasn):
        checksum_id = str(uuid.uuid4())
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)
        s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json')
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        checksum_event = ChecksumEvent(file_id=uploaded_file.db_id,
                                       checksum_id=checksum_id,
                                       job_id='12345',
                                       status="SCHEDULED")
        checksum_event.create_record()

        response = self.client.get(f"/v1/area/{upload_area.uuid}/{uploaded_file.name}/checksum")

        info = response.get_json()
        self.assertEqual("SCHEDULED", info['checksum_status'])
        self.assertEqual(uploaded_file.checksums, info['checksums'])
    def test_update_event_with_checksum_event(self, mock_format_and_send_notification):

        checksum_id = str(uuid.uuid4())
        area_uuid = self._create_area()
        s3obj = self.mock_upload_file_to_s3(area_uuid, 'foo.json')
        upload_area = UploadArea(area_uuid)
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        checksum_event = ChecksumEvent(file_id=uploaded_file.db_id,
                                       checksum_id=checksum_id,
                                       job_id='12345',
                                       status="SCHEDULED")
        checksum_event.create_record()

        checksum_event.status = "CHECKSUMMING"
        response = update_event(checksum_event, uploaded_file.info(), self.client)
        self.assertEqual(204, response.status_code)
        record = UploadDB().get_pg_record("checksum", checksum_id)
        self.assertEqual("CHECKSUMMING", record["status"])
        self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_started_at"))))
        self.assertEqual(None, record["checksum_ended_at"])

        checksum_event.status = "CHECKSUMMED"
        response = update_event(checksum_event, uploaded_file.info(), self.client)
        self.assertEqual(204, response.status_code)
        record = UploadDB().get_pg_record("checksum", checksum_id)
        self.assertEqual("CHECKSUMMED", record["status"])
        self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_started_at"))))
        self.assertEqual("<class 'datetime.datetime'>", str(type(record.get("checksum_ended_at"))))
Ejemplo n.º 6
0
    def test_post_checksum__with_a_checksummed_payload__updates_db_records_and_notifies_ingest(
            self, mock_fasn):
        checksum_id = str(uuid.uuid4())
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)
        s3obj = self.mock_upload_file_to_s3(upload_area.uuid, 'foo.json')
        uploaded_file = UploadedFile(upload_area, s3object=s3obj)
        checksum_event = ChecksumEvent(file_id=uploaded_file.db_id,
                                       checksum_id=checksum_id,
                                       job_id='12345',
                                       status="SCHEDULED")
        checksum_event.create_record()
        checksums = {'s3_etag': '1', 'sha1': '2', 'sha256': '3', 'crc32c': '4'}
        response = self.client.post(
            f"/v1/area/{upload_area.uuid}/update_checksum/{checksum_id}",
            json={
                "status": "CHECKSUMMED",
                "job_id": checksum_event.job_id,
                "payload": {
                    "upload_area_id": upload_area.db_id,
                    "name": uploaded_file.name,
                    "checksums": checksums
                }
            })

        self.assertEqual(204, response.status_code)

        # Checksum record status should be updated
        db_checksum = self.db.query(DbChecksum).filter(
            DbChecksum.id == checksum_id).one()
        self.assertEqual("CHECKSUMMED", db_checksum.status)

        # Checksums should be stored in File record
        db_file = self.db.query(DbFile).filter(
            DbFile.id == uploaded_file.db_id).one()
        self.assertEqual(checksums, db_file.checksums)

        # Ingest should be notified
        mock_fasn.assert_called()
Ejemplo n.º 7
0
    def test_checksum_statuses_for_upload_area(self, mock_format_and_send_notification):
        db_area = self.create_upload_area()
        upload_area = UploadArea(db_area.uuid)

        checksum1_id = str(uuid.uuid4())
        checksum2_id = str(uuid.uuid4())
        checksum3_id = str(uuid.uuid4())

        s3obj1 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo1.json')
        s3obj2 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo2.json')
        s3obj3 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo3.json')
        s3obj4 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo4.json')
        s3obj5 = self.mock_upload_file_to_s3(upload_area.uuid, 'foo5.json')

        f1 = UploadedFile(upload_area, s3object=s3obj1)
        f2 = UploadedFile(upload_area, s3object=s3obj2)
        f3 = UploadedFile(upload_area, s3object=s3obj3)
        UploadedFile(upload_area, s3object=s3obj4)
        UploadedFile(upload_area, s3object=s3obj5)

        checksum1_event = ChecksumEvent(file_id=f1.db_id, checksum_id=checksum1_id, job_id='123', status="SCHEDULED")
        checksum2_event = ChecksumEvent(file_id=f2.db_id, checksum_id=checksum2_id, job_id='456', status="CHECKSUMMING")
        checksum3_event = ChecksumEvent(file_id=f3.db_id, checksum_id=checksum3_id, job_id='789', status="CHECKSUMMED")
        checksum1_event.create_record()
        checksum2_event.create_record()
        checksum3_event.create_record()

        response = self.client.get(f"/v1/area/{upload_area.uuid}/checksums")
        expected_data = {
            'CHECKSUMMED': 1,
            'CHECKSUMMING': 1,
            'CHECKSUMMING_UNSCHEDULED': 2,
            'SCHEDULED': 1,
            'TOTAL_NUM_FILES': 5
        }

        assert response.get_json() == expected_data