def batch_file() -> BatchFile: batch = BatchFile( index=1, keys=[ TemporaryExposureKey( key_data=base64.b64encode( "first_key".encode("utf-8")).decode("utf-8"), transmission_risk_level=TransmissionRiskLevel.low, rolling_start_number=1, ), TemporaryExposureKey( key_data=base64.b64encode( "second_key".encode("utf-8")).decode("utf-8"), transmission_risk_level=TransmissionRiskLevel.low, rolling_start_number=2, ), TemporaryExposureKey( key_data=base64.b64encode( "third_key".encode("utf-8")).decode("utf-8"), transmission_risk_level=TransmissionRiskLevel.highest, rolling_start_number=3, ), ], period_start=datetime.utcnow() - timedelta(days=1), period_end=datetime.utcnow(), sub_batch_count=2, sub_batch_index=1, client_content=b"this_is_a_zip_file", ) batch.save() return batch
def test_get_latest_info(batch_files: List[BatchFile]) -> None: info = BatchFile.get_latest_info() assert info last_period, last_index = info assert (last_period - datetime.utcnow()).total_seconds() < 1 assert last_index == 10
def batch_files() -> List[BatchFile]: num_batches = 10 start_datetime = datetime.utcnow() - timedelta(days=num_batches - 1) batches = [] for i in range(num_batches): with freeze_time(start_datetime + timedelta(days=i)): batches.append( BatchFile( index=i + 1, keys=[ TemporaryExposureKey( key_data=generate_random_key_data(), transmission_risk_level=TransmissionRiskLevel. highest, rolling_start_number=int( datetime.utcnow().timestamp() / timedelta(minutes=10).total_seconds()), rolling_period=144, ) ], period_start=datetime.utcnow() - timedelta(days=1), period_end=datetime.utcnow(), sub_batch_index=1, sub_batch_count=1, ).save()) return batches
def delete_old_data() -> None: """ Periodically (default: every day, at midnight) delete data older than DATA_RETENTION_DAYS days (default: 14). Deleted data comprises (i) Upload and (ii) BatchFile models. """ reference_date = datetime.combine( date.today(), datetime.min.time()) - timedelta(days=config.DATA_RETENTION_DAYS) # Make sure there are no unprocessed uploads in the data about to be deleted. if Upload.unprocessed_before(reference_date): _LOGGER.error( "Some Upload objects were unprocessed until deleted! This should never happen!" ) Upload.delete_older_than(reference_date) BatchFile.delete_older_than(reference_date)
def test_indexes(batch_files: List[BatchFile]) -> None: explain = get_db().command( "aggregate", BatchFile._meta["collection"], pipeline=BatchFile._get_oldest_and_newest_indexes_pipeline(2), explain=True, ) assert (explain["stages"][0]["$cursor"]["queryPlanner"]["winningPlan"] ["inputStage"]["stage"] == "IXSCAN") # sort should not be executed assert len(explain["stages"]) == 3
def generate_random_batch(index: int, num_keys: int, period_start: datetime, period_end: datetime) -> None: rsn = int(datetime.utcnow().timestamp() / 600) keys = [ TemporaryExposureKey( key_data=generate_random_key_data(), transmission_risk_level=random.choice( [tr for tr in TransmissionRiskLevel]), rolling_start_number=rsn, ) for _ in range(num_keys) ] BatchFile(index=index, keys=keys, period_start=period_start, period_end=period_end).save()
async def generate_various_data(num_days: int) -> None: starting_date = datetime.utcnow() - timedelta(days=num_days) for i in range(num_days): with freeze_time(starting_date + timedelta(days=i)): generate_random_uploads( 1, start_time=datetime.utcnow(), end_time=datetime.utcnow() + timedelta(days=1, seconds=1), ) BatchFile( index=i, keys=[ TemporaryExposureKey(key_data="dummy_data", rolling_start_number=12345) ], period_start=datetime.today() - timedelta(days=1, seconds=1), period_end=datetime.today(), ).save()
def test_oldest_newest_batches(batch_files: List[BatchFile]) -> None: assert BatchFile.get_oldest_and_newest_indexes(days=4) == { "oldest": 7, "newest": 10, }
def test_delete_old_batches(batch_files: List[BatchFile]) -> None: BatchFile.delete_older_than(datetime.utcnow() - timedelta(days=8, seconds=1)) assert BatchFile.objects.count() == 9
def test_from_index(batch_files: List[BatchFile]) -> None: assert BatchFile.from_index(1) == batch_files[0]
def test_info_empty() -> None: assert BatchFile.get_latest_info() is None
async def _process_uploads() -> None: """ This task is run every "PERIOD". The period is defined in config, it can be thought of as a period of 6 / 12 / 24 hours. The task will create a batch file from the unprocessed Uploads of the same period, and perform some minor validations. If the number of keys in the given uploads is greater than the maximum number of keys allowed in a single batch, the task will create multiple batches and group them into "sub-batches" instead. """ _LOGGER.info("About to start processing uploads.") # Acquire a lock on redis before processing anything, avoiding concurrent tasks. async with lock_concurrency("process_uploads"): _LOGGER.info("Obtained lock.") infos = BatchFile.get_latest_info() now = datetime.utcnow() if infos: last_period, last_index = infos else: last_period = datetime.fromtimestamp( croniter(config.BATCH_PERIODICITY_CRONTAB).get_prev()) last_index = 0 period_start = last_period period_end = now _LOGGER.info( "Starting to process uploads.", extra=dict(period_start=period_start, period_end=period_end), ) uploads = Upload.to_process() _LOGGER.info("Uploads have been fetched.", extra=dict(n_uploads=uploads.count())) processed_uploads: List[ObjectId] = [] keys: List[TemporaryExposureKey] = [] for upload in uploads: if (reached := len(keys) + len(upload.keys)) > config.MAX_KEYS_PER_BATCH: _LOGGER.warning( "Early stop: reached maximum number of keys per batch.", extra=dict(pre_reached=len(keys), reached=reached, max=config.MAX_KEYS_PER_BATCH), ) break keys += extract_keys_with_risk_level_from_upload(upload) processed_uploads.append(upload.id) if (n_keys := len(keys)) > 0: # Sort the keys. This randomizes their order (since they are random strings) so that # keys of the same device are no more likely to end up consecutively. keys = sorted(keys, key=lambda x: x.key_data) index = last_index + 1 batch_file = BatchFile( index=index, keys=keys, period_start=period_start, period_end=period_end, sub_batch_index=1, sub_batch_count=1, ) batch_file.client_content = batch_to_sdk_zip_file(batch_file) batch_file.save() _LOGGER.info("Created new batch.", extra=dict(index=index, n_keys=n_keys))