def test_create_wait_to_refresh_bq_tasks_state_ingest_locked( self, mock_task_manager): # Arrange mock_table = Mock() mock_table.name = "test_table" self.mock_bq_refresh_config.for_schema_type.return_value.get_tables_to_export.return_value = [ mock_table ] lock_manager = GCSPseudoLockManager() lock_manager.lock(GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_NAME) # Act response = self.mock_flask_client.get( "/create_refresh_bq_tasks/state", headers={"X-Appengine-Inbound-Appid": "recidiviz-123"}, ) # Assert self.assertEqual(response.status_code, HTTPStatus.OK) self.assertFalse( lock_manager.no_active_locks_with_prefix( POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME)) self.assertTrue( mock_task_manager.return_value. job_monitor_cloud_task_queue_manager.create_task.called) mock_task_manager.return_value.create_refresh_bq_table_task.assert_not_called( ) mock_task_manager.return_value.create_bq_refresh_monitor_task.assert_not_called( )
def test_using_lock_when_already_locked(self) -> None: lock_manager = GCSPseudoLockManager() lock_manager.lock(self.LOCK_NAME) with self.assertRaises(GCSPseudoLockAlreadyExists): with lock_manager.using_lock(self.LOCK_NAME, self.CONTENTS): pass
def test_create_refresh_bq_tasks_state(self, mock_task_manager): # Arrange mock_table = Mock() mock_table.name = "test_table" self.mock_bq_refresh_config.for_schema_type.return_value.get_tables_to_export.return_value = [ mock_table ] lock_manager = GCSPseudoLockManager() # Act response = self.mock_flask_client.get( "/create_refresh_bq_tasks/state", headers={"X-Appengine-Inbound-Appid": "recidiviz-123"}, ) # Assert self.assertFalse( lock_manager.no_active_locks_with_prefix( POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME)) self.assertEqual(response.status_code, HTTPStatus.OK) self.mock_bq_refresh_config.for_schema_type.assert_called_with( SchemaType.STATE) mock_task_manager.return_value.create_refresh_bq_table_task.assert_called_with( "test_table", SchemaType.STATE) mock_task_manager.return_value.create_bq_refresh_monitor_task.assert_called_with( SchemaType.STATE.value, "v1.calculator.trigger_daily_pipelines", ANY)
def test_region_are_running(self) -> None: """Ensures lock manager can see regions are running""" lock_manager = GCSPseudoLockManager(self.PROJECT_ID) lock_manager.lock(GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_NAME + self.REGION.upper()) self.assertFalse( lock_manager.no_active_locks_with_prefix( GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_NAME))
def test_lock_two_diff_unlock_one(self) -> None: """Locks two different locks, unlocks one, asserts both in correct place""" lock_manager = GCSPseudoLockManager(self.PROJECT_ID) lock_manager.lock(self.LOCK_NAME) lock_manager.lock(self.LOCK_NAME2) lock_manager.unlock(self.LOCK_NAME) self.assertFalse(lock_manager.is_locked(self.LOCK_NAME)) self.assertTrue(lock_manager.is_locked(self.LOCK_NAME2))
def test_contents_of_lock_set(self) -> None: """Locks with pre-specified contents and asserts the lockfile contains those contents""" lock_manager = GCSPseudoLockManager(self.PROJECT_ID) lock_manager.lock(self.LOCK_NAME, self.CONTENTS) path = GcsfsFilePath(bucket_name=lock_manager.bucket_name, blob_name=self.LOCK_NAME) actual_contents = self.fs.download_as_string(path) self.assertEqual(self.CONTENTS, actual_contents)
def test_locks_with_prefix_ignore_expired(self) -> None: """Ensures lock manager can see if locks with a prefix exist.""" prefix = "SOME_LOCK_PREFIX" lock_manager = GCSPseudoLockManager(self.PROJECT_ID) self._upload_fake_expired_lock(lock_manager, prefix + "some_suffix") self.assertTrue(lock_manager.no_active_locks_with_prefix(prefix))
def test_unlock_locks_with_prefix(self) -> None: """Tests that all locks with prefix are unlocked""" lock_manager = GCSPseudoLockManager() lock_manager.lock(self.PREFIX + self.LOCK_NAME) lock_manager.lock(self.PREFIX + self.LOCK_NAME2) lock_manager.unlock_locks_with_prefix(self.PREFIX) self.assertFalse(lock_manager.is_locked(self.PREFIX + self.LOCK_NAME)) self.assertFalse(lock_manager.is_locked(self.PREFIX + self.LOCK_NAME2))
def test_contents_of_lock_default(self) -> None: """Locks with default contents and asserts the lockfile contains correct time""" lock_manager = GCSPseudoLockManager(self.PROJECT_ID) lock_manager.lock(self.LOCK_NAME) correct_contents = datetime.now().strftime(self.TIME_FORMAT) path = GcsfsFilePath(bucket_name=lock_manager.bucket_name, blob_name=self.LOCK_NAME) actual_contents = self.fs.download_as_string(path) self.assertEqual(correct_contents, actual_contents)
def test_unlock_expired(self) -> None: lock_manager = GCSPseudoLockManager() self._upload_fake_expired_lock(lock_manager, self.LOCK_NAME) self.assertFalse(lock_manager.is_locked(self.LOCK_NAME)) # Should not raise an error lock_manager.unlock(self.LOCK_NAME)
def test_contents_of_lock_default(self) -> None: """Locks with default contents and asserts the lockfile contains correct time""" lock_manager = GCSPseudoLockManager(self.PROJECT_ID) lock_manager.lock(self.LOCK_NAME) path = GcsfsFilePath(bucket_name=lock_manager.bucket_name, blob_name=self.LOCK_NAME) actual_body = GCSPseudoLockBody.from_json_string( self.fs.download_as_string(path)) self.assertIsNotNone(actual_body)
def test_unlock_delete_fails_using_lock(self) -> None: self.gcs_factory_patcher.stop() self.gcs_factory_patcher.start().return_value = _FailingDeleteFs() lock_manager = GCSPseudoLockManager(self.PROJECT_ID) with self.assertRaises(GCSPseudoLockFailedUnlock): with lock_manager.using_lock(self.LOCK_NAME): pass
def test_raise_from_using_lock(self) -> None: lock_manager = GCSPseudoLockManager() with self.assertRaises(ValueError): with lock_manager.using_lock(self.LOCK_NAME, self.CONTENTS): raise ValueError # lock should be unlocked outside of with self.assertFalse(lock_manager.is_locked(self.LOCK_NAME))
def __init__(self, region_name: str, system_level: SystemLevel): """Initialize the controller. Args: region_name: (str) the name of the region to be collected. """ self.region = regions.get_region(region_name, is_direct_ingest=True) self.system_level = system_level self.cloud_task_manager = DirectIngestCloudTaskManagerImpl() self.lock_manager = GCSPseudoLockManager()
def test_lock_two_diff(self) -> None: """Locks two different locks, asserts both locked""" lock_manager = GCSPseudoLockManager(self.PROJECT_ID) lock_manager.lock(self.LOCK_NAME) lock_manager.lock(self.LOCK_NAME2) self.assertTrue(lock_manager.is_locked(self.LOCK_NAME)) self.assertTrue(lock_manager.is_locked(self.LOCK_NAME2))
def test_lock_one_unlock_other(self) -> None: """Locks one lock and unlocks another, asserts both have correct status""" lock_manager = GCSPseudoLockManager(self.PROJECT_ID) lock_manager.lock(self.LOCK_NAME) with self.assertRaises(GCSPseudoLockDoesNotExist): lock_manager.unlock(self.LOCK_NAME2) self.assertTrue(lock_manager.is_locked(self.LOCK_NAME)) self.assertFalse(lock_manager.is_locked(self.LOCK_NAME2))
def __init__( self, region_code: str, ingest_instance: DirectIngestInstance, blocking_locks: List[str], ) -> None: """ Args: region_code: The region code for the region to lock / unlock ingest for. blocking_locks: Any locks that, if present, mean ingest into Postgres cannot proceed for this region. """ self.region_code = region_code self.ingest_instance = ingest_instance self.blocking_locks = blocking_locks self.lock_manager = GCSPseudoLockManager()
def test_lock_expiration_not_met(self) -> None: now = datetime.now() lock_manager = GCSPseudoLockManager() path = GcsfsFilePath(bucket_name=lock_manager.bucket_name, blob_name=self.LOCK_NAME) self.fs.upload_from_string( path, json.dumps( GCSPseudoLockBody(lock_time=now, expiration_in_seconds=60).to_json(), default=str, ), content_type="text/text", ) self.assertTrue(lock_manager.is_locked(self.LOCK_NAME))
def test_double_unlock(self) -> None: """Unlocks and then unlocks gain, asserts its still unlocked and raises an error""" lock_manager = GCSPseudoLockManager(self.PROJECT_ID) self.assertFalse(lock_manager.is_locked(self.LOCK_NAME)) with self.assertRaises(GCSPseudoLockDoesNotExist): lock_manager.unlock(self.LOCK_NAME) with self.assertRaises(GCSPseudoLockDoesNotExist): lock_manager.unlock(self.LOCK_NAME) self.assertFalse(lock_manager.is_locked(self.LOCK_NAME))
def test_double_lock_diff_contents(self) -> None: """Locks and then locks again with unique contents, asserts its still locked and an error is raised""" lock_manager = GCSPseudoLockManager(self.PROJECT_ID) lock_manager.lock(self.LOCK_NAME, payload=self.CONTENTS) with self.assertRaises(GCSPseudoLockAlreadyExists): lock_manager.lock(self.LOCK_NAME, self.CONTENTS2) self.assertTrue(lock_manager.is_locked(self.LOCK_NAME)) self.assertEqual(self.CONTENTS, lock_manager.get_lock_payload(self.LOCK_NAME))
def test_double_lock(self) -> None: """Locks and then locks again, asserts its still locked and an error is raised""" lock_manager = GCSPseudoLockManager(self.PROJECT_ID) lock_manager.lock(self.LOCK_NAME) with self.assertRaises(GCSPseudoLockAlreadyExists): lock_manager.lock(self.LOCK_NAME) self.assertTrue(lock_manager.is_locked(self.LOCK_NAME))
def test_monitor_refresh_bq_tasks_requeue_with_topic_and_message( self, mock_task_manager: mock.MagicMock, mock_pubsub_helper: mock.MagicMock, mock_supported_region_codes: mock.MagicMock, ) -> None: """Test that a new bq monitor task is added to the queue when there are still unfinished tasks on the bq queue, with topic/message to publish.""" queue_path = "test-queue-path" lock_manager = GCSPseudoLockManager() mock_supported_region_codes.return_value = [] schema = "schema" topic = "fake_topic" message = "fake_message" route = "/monitor_refresh_bq_tasks" data = {"schema": schema, "topic": topic, "message": message} lock_manager.lock(POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME + schema.upper()) mock_task_manager.return_value.get_bq_queue_info.return_value = ( CloudTaskQueueInfo( queue_name="queue_name", task_names=[ f"{queue_path}/tasks/table_name-123-{schema}", f"{queue_path}/tasks/table_name-456-{schema}", f"{queue_path}/tasks/table_name-789-{schema}", ], )) response = self.mock_flask_client.post( route, data=json.dumps(data), content_type="application/json", headers={"X-Appengine-Inbound-Appid": "recidiviz-123"}, ) self.assertEqual(response.status_code, HTTPStatus.OK) mock_task_manager.return_value.create_bq_refresh_monitor_task.assert_called_with( schema, topic, message) mock_pubsub_helper.publish_message_to_topic.assert_not_called() self.assertTrue( lock_manager.is_locked(POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME + schema.upper()))
def test_locks_with_prefix_do_not_exist(self) -> None: """Ensures lock manager can see regions are not running""" prefix = "SOME_LOCK_PREFIX" lock_name = prefix + "some_suffix" lock_manager = GCSPseudoLockManager(self.PROJECT_ID) lock_manager.lock(lock_name) lock_manager.unlock(lock_name) self.assertTrue(lock_manager.no_active_locks_with_prefix(prefix))
def test_double_lock_diff_contents(self) -> None: """Locks and then locks again with unique contents, asserts its still locked and an error is raised""" lock_manager = GCSPseudoLockManager(self.PROJECT_ID) lock_manager.lock(self.LOCK_NAME) time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S") lock_id = str(uuid.uuid4()) contents_as_json = {"time": time, "uuid": lock_id} contents = json.dumps(contents_as_json) with self.assertRaises(GCSPseudoLockAlreadyExists): lock_manager.lock(self.LOCK_NAME, contents) self.assertTrue(lock_manager.is_locked(self.LOCK_NAME)) self.assertEqual(time, lock_manager.get_lock_contents(self.LOCK_NAME))
def monitor_refresh_bq_tasks() -> Tuple[str, int]: """Worker function to publish a message to a Pub/Sub topic once all tasks in the BIGQUERY_QUEUE queue have completed. """ json_data = request.get_data(as_text=True) data = json.loads(json_data) schema = data["schema"] topic = data["topic"] message = data["message"] task_manager = BQRefreshCloudTaskManager() # If any of the tasks in the queue have task_name containing schema, consider BQ tasks in queue bq_tasks_in_queue = False bq_task_list = task_manager.get_bq_queue_info().task_names for task_name in bq_task_list: task_id = task_name[task_name.find("/tasks/"):] if schema in task_id: bq_tasks_in_queue = True # If there are BQ tasks in the queue, then re-queue this task in a minute if bq_tasks_in_queue: logging.info("Tasks still in bigquery queue. Re-queuing bq monitor" " task.") task_manager.create_bq_refresh_monitor_task(schema, topic, message) return "", HTTPStatus.OK # Publish a message to the Pub/Sub topic once state BQ export is complete if topic: pubsub_helper.publish_message_to_topic(message=message, topic=topic) # Unlock export lock when all BQ exports complete lock_manager = GCSPseudoLockManager() lock_manager.unlock(postgres_to_bq_lock_name_with_suffix(schema)) logging.info( "Done running export for %s, unlocking Postgres to BigQuery export", schema) # Kick scheduler to restart ingest kick_all_schedulers() return ("", HTTPStatus.OK)
def test_contents_of_unlocked_and_relocked(self) -> None: """Locks with pre-specified contents and asserts the lockfile contains those contents""" lock_manager = GCSPseudoLockManager(self.PROJECT_ID) lock_manager.lock(self.LOCK_NAME, self.CONTENTS) lock_manager.unlock(self.LOCK_NAME) lock_manager.lock(self.LOCK_NAME, self.CONTENTS2) path = GcsfsFilePath(bucket_name=lock_manager.bucket_name, blob_name=self.LOCK_NAME) actual_body = GCSPseudoLockBody.from_json_string( self.fs.download_as_string(path)) assert actual_body is not None self.assertEqual(self.CONTENTS2, actual_body.payload)
def wait_for_ingest_to_create_tasks(schema_arg: str) -> Tuple[str, HTTPStatus]: """Worker function to wait until ingest is not running to create_all_bq_refresh_tasks_for_schema. When ingest is not running/locked, creates task to create_all_bq_refresh_tasks_for_schema. When ingest is running/locked, re-enqueues this task to run again in 60 seconds. """ task_manager = BQRefreshCloudTaskManager() lock_manager = GCSPseudoLockManager() json_data_text = request.get_data(as_text=True) try: json_data = json.loads(json_data_text) except (TypeError, json.decoder.JSONDecodeError): json_data = {} if "lock_id" not in json_data: lock_id = str(uuid.uuid4()) else: lock_id = json_data["lock_id"] logging.info("Request lock id: %s", lock_id) if not lock_manager.is_locked( postgres_to_bq_lock_name_with_suffix(schema_arg)): time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S") contents_as_json = {"time": time, "lock_id": lock_id} contents = json.dumps(contents_as_json) lock_manager.lock(postgres_to_bq_lock_name_with_suffix(schema_arg), contents) else: contents = lock_manager.get_lock_contents( postgres_to_bq_lock_name_with_suffix(schema_arg)) try: contents_json = json.loads(contents) except (TypeError, json.decoder.JSONDecodeError): contents_json = {} logging.info("Lock contents: %s", contents_json) if lock_id != contents_json.get("lock_id"): raise GCSPseudoLockAlreadyExists( f"UUID {lock_id} does not match existing lock's UUID") no_regions_running = lock_manager.no_active_locks_with_prefix( GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_NAME) if not no_regions_running: logging.info("Regions running, renqueuing this task.") task_id = "{}-{}-{}".format("renqueue_wait_task", str(datetime.utcnow().date()), uuid.uuid4()) body = {"schema_type": schema_arg, "lock_id": lock_id} task_manager.job_monitor_cloud_task_queue_manager.create_task( task_id=task_id, body=body, relative_uri= f"/cloud_sql_to_bq/create_refresh_bq_tasks/{schema_arg}", schedule_delay_seconds=60, ) return "", HTTPStatus.OK logging.info("No regions running, calling create_refresh_bq_tasks") create_all_bq_refresh_tasks_for_schema(schema_arg) return "", HTTPStatus.OK
def test_create_wait_to_refresh_bq_tasks_state_export_locked( self, mock_task_manager): # Arrange mock_table = Mock() mock_table.name = "test_table" self.mock_bq_refresh_config.for_schema_type.return_value.get_tables_to_export.return_value = [ mock_table ] lock_manager = GCSPseudoLockManager() lock_manager.lock(POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME + "STATE") # Act with self.assertRaises(GCSPseudoLockAlreadyExists): self.mock_flask_client.get( "/create_refresh_bq_tasks/state", headers={"X-Appengine-Inbound-Appid": "recidiviz-123"}, ) mock_task_manager.return_value.create_refresh_bq_table_task.assert_not_called( ) mock_task_manager.return_value.create_bq_refresh_monitor_task.assert_not_called( )
def test_lock_unlock_with_retry(self) -> None: """Locks then unlocks temp, checks if still locked""" self.gcs_factory_patcher.stop() self.gcs_factory_patcher.start( ).return_value = _MultipleAttemptDeleteFs() lock_manager = GCSPseudoLockManager(self.PROJECT_ID) lock_manager.lock(self.LOCK_NAME) lock_manager.unlock(self.LOCK_NAME) self.assertFalse(lock_manager.is_locked(self.LOCK_NAME))
def test_monitor_refresh_bq_tasks_requeue_unlock_no_publish( self, mock_task_manager: mock.MagicMock, mock_pubsub_helper: mock.MagicMock, mock_supported_region_codes: mock.MagicMock, ) -> None: """Test that a bq monitor task does not publish topic/message with empty topic/message and that it unlocks export lock""" lock_manager = GCSPseudoLockManager() mock_supported_region_codes.return_value = [] schema = "schema" topic = "" message = "" route = "/monitor_refresh_bq_tasks" data = {"schema": schema, "topic": topic, "message": message} lock_manager.lock(POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME + schema.upper()) mock_task_manager.return_value.get_bq_queue_info.return_value = ( CloudTaskQueueInfo(queue_name="queue_name", task_names=[])) response = self.mock_flask_client.post( route, data=json.dumps(data), content_type="application/json", headers={"X-Appengine-Inbound-Appid": "recidiviz-123"}, ) self.assertEqual(response.status_code, HTTPStatus.OK) mock_task_manager.return_value.create_bq_refresh_monitor_task.assert_not_called( ) mock_pubsub_helper.publish_message_to_topic.assert_not_called() self.assertFalse( lock_manager.is_locked(POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME + schema.upper()))