Ejemplo n.º 1
0
    def test_create_wait_to_refresh_bq_tasks_state_ingest_locked(
            self, mock_task_manager):
        # Arrange
        mock_table = Mock()
        mock_table.name = "test_table"
        self.mock_bq_refresh_config.for_schema_type.return_value.get_tables_to_export.return_value = [
            mock_table
        ]
        lock_manager = GCSPseudoLockManager()
        lock_manager.lock(GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_NAME)

        # Act
        response = self.mock_flask_client.get(
            "/create_refresh_bq_tasks/state",
            headers={"X-Appengine-Inbound-Appid": "recidiviz-123"},
        )

        # Assert
        self.assertEqual(response.status_code, HTTPStatus.OK)
        self.assertFalse(
            lock_manager.no_active_locks_with_prefix(
                POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME))
        self.assertTrue(
            mock_task_manager.return_value.
            job_monitor_cloud_task_queue_manager.create_task.called)
        mock_task_manager.return_value.create_refresh_bq_table_task.assert_not_called(
        )
        mock_task_manager.return_value.create_bq_refresh_monitor_task.assert_not_called(
        )
    def test_using_lock_when_already_locked(self) -> None:
        lock_manager = GCSPseudoLockManager()
        lock_manager.lock(self.LOCK_NAME)

        with self.assertRaises(GCSPseudoLockAlreadyExists):
            with lock_manager.using_lock(self.LOCK_NAME, self.CONTENTS):
                pass
Ejemplo n.º 3
0
    def test_create_refresh_bq_tasks_state(self, mock_task_manager):
        # Arrange
        mock_table = Mock()
        mock_table.name = "test_table"
        self.mock_bq_refresh_config.for_schema_type.return_value.get_tables_to_export.return_value = [
            mock_table
        ]
        lock_manager = GCSPseudoLockManager()

        # Act
        response = self.mock_flask_client.get(
            "/create_refresh_bq_tasks/state",
            headers={"X-Appengine-Inbound-Appid": "recidiviz-123"},
        )

        # Assert
        self.assertFalse(
            lock_manager.no_active_locks_with_prefix(
                POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME))
        self.assertEqual(response.status_code, HTTPStatus.OK)
        self.mock_bq_refresh_config.for_schema_type.assert_called_with(
            SchemaType.STATE)
        mock_task_manager.return_value.create_refresh_bq_table_task.assert_called_with(
            "test_table", SchemaType.STATE)
        mock_task_manager.return_value.create_bq_refresh_monitor_task.assert_called_with(
            SchemaType.STATE.value, "v1.calculator.trigger_daily_pipelines",
            ANY)
 def test_region_are_running(self) -> None:
     """Ensures lock manager can see regions are running"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_NAME +
                       self.REGION.upper())
     self.assertFalse(
         lock_manager.no_active_locks_with_prefix(
             GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_NAME))
 def test_lock_two_diff_unlock_one(self) -> None:
     """Locks two different locks, unlocks one, asserts both in correct place"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME)
     lock_manager.lock(self.LOCK_NAME2)
     lock_manager.unlock(self.LOCK_NAME)
     self.assertFalse(lock_manager.is_locked(self.LOCK_NAME))
     self.assertTrue(lock_manager.is_locked(self.LOCK_NAME2))
 def test_contents_of_lock_set(self) -> None:
     """Locks with pre-specified contents and asserts the lockfile contains those contents"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME, self.CONTENTS)
     path = GcsfsFilePath(bucket_name=lock_manager.bucket_name,
                          blob_name=self.LOCK_NAME)
     actual_contents = self.fs.download_as_string(path)
     self.assertEqual(self.CONTENTS, actual_contents)
    def test_locks_with_prefix_ignore_expired(self) -> None:
        """Ensures lock manager can see if locks with a prefix exist."""
        prefix = "SOME_LOCK_PREFIX"
        lock_manager = GCSPseudoLockManager(self.PROJECT_ID)

        self._upload_fake_expired_lock(lock_manager, prefix + "some_suffix")

        self.assertTrue(lock_manager.no_active_locks_with_prefix(prefix))
 def test_unlock_locks_with_prefix(self) -> None:
     """Tests that all locks with prefix are unlocked"""
     lock_manager = GCSPseudoLockManager()
     lock_manager.lock(self.PREFIX + self.LOCK_NAME)
     lock_manager.lock(self.PREFIX + self.LOCK_NAME2)
     lock_manager.unlock_locks_with_prefix(self.PREFIX)
     self.assertFalse(lock_manager.is_locked(self.PREFIX + self.LOCK_NAME))
     self.assertFalse(lock_manager.is_locked(self.PREFIX + self.LOCK_NAME2))
 def test_contents_of_lock_default(self) -> None:
     """Locks with default contents and asserts the lockfile contains correct time"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME)
     correct_contents = datetime.now().strftime(self.TIME_FORMAT)
     path = GcsfsFilePath(bucket_name=lock_manager.bucket_name,
                          blob_name=self.LOCK_NAME)
     actual_contents = self.fs.download_as_string(path)
     self.assertEqual(correct_contents, actual_contents)
    def test_unlock_expired(self) -> None:
        lock_manager = GCSPseudoLockManager()

        self._upload_fake_expired_lock(lock_manager, self.LOCK_NAME)

        self.assertFalse(lock_manager.is_locked(self.LOCK_NAME))

        # Should not raise an error
        lock_manager.unlock(self.LOCK_NAME)
 def test_contents_of_lock_default(self) -> None:
     """Locks with default contents and asserts the lockfile contains correct time"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME)
     path = GcsfsFilePath(bucket_name=lock_manager.bucket_name,
                          blob_name=self.LOCK_NAME)
     actual_body = GCSPseudoLockBody.from_json_string(
         self.fs.download_as_string(path))
     self.assertIsNotNone(actual_body)
    def test_unlock_delete_fails_using_lock(self) -> None:
        self.gcs_factory_patcher.stop()
        self.gcs_factory_patcher.start().return_value = _FailingDeleteFs()

        lock_manager = GCSPseudoLockManager(self.PROJECT_ID)

        with self.assertRaises(GCSPseudoLockFailedUnlock):
            with lock_manager.using_lock(self.LOCK_NAME):
                pass
    def test_raise_from_using_lock(self) -> None:
        lock_manager = GCSPseudoLockManager()

        with self.assertRaises(ValueError):
            with lock_manager.using_lock(self.LOCK_NAME, self.CONTENTS):
                raise ValueError

        # lock should be unlocked outside of with
        self.assertFalse(lock_manager.is_locked(self.LOCK_NAME))
    def __init__(self, region_name: str, system_level: SystemLevel):
        """Initialize the controller.

        Args:
            region_name: (str) the name of the region to be collected.
        """

        self.region = regions.get_region(region_name, is_direct_ingest=True)
        self.system_level = system_level
        self.cloud_task_manager = DirectIngestCloudTaskManagerImpl()
        self.lock_manager = GCSPseudoLockManager()
 def test_lock_two_diff(self) -> None:
     """Locks two different locks, asserts both locked"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME)
     lock_manager.lock(self.LOCK_NAME2)
     self.assertTrue(lock_manager.is_locked(self.LOCK_NAME))
     self.assertTrue(lock_manager.is_locked(self.LOCK_NAME2))
 def test_lock_one_unlock_other(self) -> None:
     """Locks one lock and unlocks another, asserts both have correct status"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME)
     with self.assertRaises(GCSPseudoLockDoesNotExist):
         lock_manager.unlock(self.LOCK_NAME2)
     self.assertTrue(lock_manager.is_locked(self.LOCK_NAME))
     self.assertFalse(lock_manager.is_locked(self.LOCK_NAME2))
Ejemplo n.º 17
0
 def __init__(
     self,
     region_code: str,
     ingest_instance: DirectIngestInstance,
     blocking_locks: List[str],
 ) -> None:
     """
     Args:
         region_code: The region code for the region to lock / unlock ingest for.
         blocking_locks: Any locks that, if present, mean ingest into Postgres
             cannot proceed for this region.
     """
     self.region_code = region_code
     self.ingest_instance = ingest_instance
     self.blocking_locks = blocking_locks
     self.lock_manager = GCSPseudoLockManager()
    def test_lock_expiration_not_met(self) -> None:
        now = datetime.now()
        lock_manager = GCSPseudoLockManager()

        path = GcsfsFilePath(bucket_name=lock_manager.bucket_name,
                             blob_name=self.LOCK_NAME)
        self.fs.upload_from_string(
            path,
            json.dumps(
                GCSPseudoLockBody(lock_time=now,
                                  expiration_in_seconds=60).to_json(),
                default=str,
            ),
            content_type="text/text",
        )
        self.assertTrue(lock_manager.is_locked(self.LOCK_NAME))
 def test_double_unlock(self) -> None:
     """Unlocks and then unlocks gain, asserts its still unlocked and raises an error"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     self.assertFalse(lock_manager.is_locked(self.LOCK_NAME))
     with self.assertRaises(GCSPseudoLockDoesNotExist):
         lock_manager.unlock(self.LOCK_NAME)
     with self.assertRaises(GCSPseudoLockDoesNotExist):
         lock_manager.unlock(self.LOCK_NAME)
     self.assertFalse(lock_manager.is_locked(self.LOCK_NAME))
    def test_double_lock_diff_contents(self) -> None:
        """Locks and then locks again with unique contents, asserts its still locked and an error is raised"""
        lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
        lock_manager.lock(self.LOCK_NAME, payload=self.CONTENTS)

        with self.assertRaises(GCSPseudoLockAlreadyExists):
            lock_manager.lock(self.LOCK_NAME, self.CONTENTS2)
        self.assertTrue(lock_manager.is_locked(self.LOCK_NAME))
        self.assertEqual(self.CONTENTS,
                         lock_manager.get_lock_payload(self.LOCK_NAME))
 def test_double_lock(self) -> None:
     """Locks and then locks again, asserts its still locked and an error is raised"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME)
     with self.assertRaises(GCSPseudoLockAlreadyExists):
         lock_manager.lock(self.LOCK_NAME)
     self.assertTrue(lock_manager.is_locked(self.LOCK_NAME))
Ejemplo n.º 22
0
    def test_monitor_refresh_bq_tasks_requeue_with_topic_and_message(
        self,
        mock_task_manager: mock.MagicMock,
        mock_pubsub_helper: mock.MagicMock,
        mock_supported_region_codes: mock.MagicMock,
    ) -> None:
        """Test that a new bq monitor task is added to the queue when there are
        still unfinished tasks on the bq queue, with topic/message to publish."""
        queue_path = "test-queue-path"
        lock_manager = GCSPseudoLockManager()
        mock_supported_region_codes.return_value = []

        schema = "schema"
        topic = "fake_topic"
        message = "fake_message"
        route = "/monitor_refresh_bq_tasks"
        data = {"schema": schema, "topic": topic, "message": message}

        lock_manager.lock(POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME +
                          schema.upper())

        mock_task_manager.return_value.get_bq_queue_info.return_value = (
            CloudTaskQueueInfo(
                queue_name="queue_name",
                task_names=[
                    f"{queue_path}/tasks/table_name-123-{schema}",
                    f"{queue_path}/tasks/table_name-456-{schema}",
                    f"{queue_path}/tasks/table_name-789-{schema}",
                ],
            ))

        response = self.mock_flask_client.post(
            route,
            data=json.dumps(data),
            content_type="application/json",
            headers={"X-Appengine-Inbound-Appid": "recidiviz-123"},
        )

        self.assertEqual(response.status_code, HTTPStatus.OK)
        mock_task_manager.return_value.create_bq_refresh_monitor_task.assert_called_with(
            schema, topic, message)
        mock_pubsub_helper.publish_message_to_topic.assert_not_called()
        self.assertTrue(
            lock_manager.is_locked(POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME +
                                   schema.upper()))
 def test_locks_with_prefix_do_not_exist(self) -> None:
     """Ensures lock manager can see regions are not running"""
     prefix = "SOME_LOCK_PREFIX"
     lock_name = prefix + "some_suffix"
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(lock_name)
     lock_manager.unlock(lock_name)
     self.assertTrue(lock_manager.no_active_locks_with_prefix(prefix))
 def test_double_lock_diff_contents(self) -> None:
     """Locks and then locks again with unique contents, asserts its still locked and an error is raised"""
     lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
     lock_manager.lock(self.LOCK_NAME)
     time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
     lock_id = str(uuid.uuid4())
     contents_as_json = {"time": time, "uuid": lock_id}
     contents = json.dumps(contents_as_json)
     with self.assertRaises(GCSPseudoLockAlreadyExists):
         lock_manager.lock(self.LOCK_NAME, contents)
     self.assertTrue(lock_manager.is_locked(self.LOCK_NAME))
     self.assertEqual(time, lock_manager.get_lock_contents(self.LOCK_NAME))
def monitor_refresh_bq_tasks() -> Tuple[str, int]:
    """Worker function to publish a message to a Pub/Sub topic once all tasks in
    the BIGQUERY_QUEUE queue have completed.
    """
    json_data = request.get_data(as_text=True)
    data = json.loads(json_data)
    schema = data["schema"]
    topic = data["topic"]
    message = data["message"]

    task_manager = BQRefreshCloudTaskManager()

    # If any of the tasks in the queue have task_name containing schema, consider BQ tasks in queue
    bq_tasks_in_queue = False
    bq_task_list = task_manager.get_bq_queue_info().task_names
    for task_name in bq_task_list:
        task_id = task_name[task_name.find("/tasks/"):]
        if schema in task_id:
            bq_tasks_in_queue = True

    # If there are BQ tasks in the queue, then re-queue this task in a minute
    if bq_tasks_in_queue:
        logging.info("Tasks still in bigquery queue. Re-queuing bq monitor"
                     " task.")
        task_manager.create_bq_refresh_monitor_task(schema, topic, message)
        return "", HTTPStatus.OK

    # Publish a message to the Pub/Sub topic once state BQ export is complete
    if topic:
        pubsub_helper.publish_message_to_topic(message=message, topic=topic)

    # Unlock export lock when all BQ exports complete
    lock_manager = GCSPseudoLockManager()
    lock_manager.unlock(postgres_to_bq_lock_name_with_suffix(schema))
    logging.info(
        "Done running export for %s, unlocking Postgres to BigQuery export",
        schema)

    # Kick scheduler to restart ingest
    kick_all_schedulers()

    return ("", HTTPStatus.OK)
    def test_contents_of_unlocked_and_relocked(self) -> None:
        """Locks with pre-specified contents and asserts the lockfile contains those contents"""
        lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
        lock_manager.lock(self.LOCK_NAME, self.CONTENTS)
        lock_manager.unlock(self.LOCK_NAME)
        lock_manager.lock(self.LOCK_NAME, self.CONTENTS2)
        path = GcsfsFilePath(bucket_name=lock_manager.bucket_name,
                             blob_name=self.LOCK_NAME)
        actual_body = GCSPseudoLockBody.from_json_string(
            self.fs.download_as_string(path))

        assert actual_body is not None
        self.assertEqual(self.CONTENTS2, actual_body.payload)
def wait_for_ingest_to_create_tasks(schema_arg: str) -> Tuple[str, HTTPStatus]:
    """Worker function to wait until ingest is not running to create_all_bq_refresh_tasks_for_schema.
    When ingest is not running/locked, creates task to create_all_bq_refresh_tasks_for_schema.
    When ingest is running/locked, re-enqueues this task to run again in 60 seconds.
    """
    task_manager = BQRefreshCloudTaskManager()
    lock_manager = GCSPseudoLockManager()
    json_data_text = request.get_data(as_text=True)
    try:
        json_data = json.loads(json_data_text)
    except (TypeError, json.decoder.JSONDecodeError):
        json_data = {}
    if "lock_id" not in json_data:
        lock_id = str(uuid.uuid4())
    else:
        lock_id = json_data["lock_id"]
    logging.info("Request lock id: %s", lock_id)

    if not lock_manager.is_locked(
            postgres_to_bq_lock_name_with_suffix(schema_arg)):
        time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
        contents_as_json = {"time": time, "lock_id": lock_id}
        contents = json.dumps(contents_as_json)
        lock_manager.lock(postgres_to_bq_lock_name_with_suffix(schema_arg),
                          contents)
    else:
        contents = lock_manager.get_lock_contents(
            postgres_to_bq_lock_name_with_suffix(schema_arg))
        try:
            contents_json = json.loads(contents)
        except (TypeError, json.decoder.JSONDecodeError):
            contents_json = {}
        logging.info("Lock contents: %s", contents_json)
        if lock_id != contents_json.get("lock_id"):
            raise GCSPseudoLockAlreadyExists(
                f"UUID {lock_id} does not match existing lock's UUID")

    no_regions_running = lock_manager.no_active_locks_with_prefix(
        GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_NAME)
    if not no_regions_running:
        logging.info("Regions running, renqueuing this task.")
        task_id = "{}-{}-{}".format("renqueue_wait_task",
                                    str(datetime.utcnow().date()),
                                    uuid.uuid4())
        body = {"schema_type": schema_arg, "lock_id": lock_id}
        task_manager.job_monitor_cloud_task_queue_manager.create_task(
            task_id=task_id,
            body=body,
            relative_uri=
            f"/cloud_sql_to_bq/create_refresh_bq_tasks/{schema_arg}",
            schedule_delay_seconds=60,
        )
        return "", HTTPStatus.OK
    logging.info("No regions running, calling create_refresh_bq_tasks")
    create_all_bq_refresh_tasks_for_schema(schema_arg)
    return "", HTTPStatus.OK
Ejemplo n.º 28
0
    def test_create_wait_to_refresh_bq_tasks_state_export_locked(
            self, mock_task_manager):
        # Arrange
        mock_table = Mock()
        mock_table.name = "test_table"
        self.mock_bq_refresh_config.for_schema_type.return_value.get_tables_to_export.return_value = [
            mock_table
        ]
        lock_manager = GCSPseudoLockManager()
        lock_manager.lock(POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME + "STATE")

        # Act
        with self.assertRaises(GCSPseudoLockAlreadyExists):
            self.mock_flask_client.get(
                "/create_refresh_bq_tasks/state",
                headers={"X-Appengine-Inbound-Appid": "recidiviz-123"},
            )

        mock_task_manager.return_value.create_refresh_bq_table_task.assert_not_called(
        )
        mock_task_manager.return_value.create_bq_refresh_monitor_task.assert_not_called(
        )
    def test_lock_unlock_with_retry(self) -> None:
        """Locks then unlocks temp, checks if still locked"""

        self.gcs_factory_patcher.stop()
        self.gcs_factory_patcher.start(
        ).return_value = _MultipleAttemptDeleteFs()

        lock_manager = GCSPseudoLockManager(self.PROJECT_ID)
        lock_manager.lock(self.LOCK_NAME)
        lock_manager.unlock(self.LOCK_NAME)
        self.assertFalse(lock_manager.is_locked(self.LOCK_NAME))
Ejemplo n.º 30
0
    def test_monitor_refresh_bq_tasks_requeue_unlock_no_publish(
        self,
        mock_task_manager: mock.MagicMock,
        mock_pubsub_helper: mock.MagicMock,
        mock_supported_region_codes: mock.MagicMock,
    ) -> None:
        """Test that a bq monitor task does not publish topic/message
        with empty topic/message and that it unlocks export lock"""
        lock_manager = GCSPseudoLockManager()
        mock_supported_region_codes.return_value = []

        schema = "schema"
        topic = ""
        message = ""
        route = "/monitor_refresh_bq_tasks"
        data = {"schema": schema, "topic": topic, "message": message}

        lock_manager.lock(POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME +
                          schema.upper())

        mock_task_manager.return_value.get_bq_queue_info.return_value = (
            CloudTaskQueueInfo(queue_name="queue_name", task_names=[]))

        response = self.mock_flask_client.post(
            route,
            data=json.dumps(data),
            content_type="application/json",
            headers={"X-Appengine-Inbound-Appid": "recidiviz-123"},
        )

        self.assertEqual(response.status_code, HTTPStatus.OK)
        mock_task_manager.return_value.create_bq_refresh_monitor_task.assert_not_called(
        )
        mock_pubsub_helper.publish_message_to_topic.assert_not_called()
        self.assertFalse(
            lock_manager.is_locked(POSTGRES_TO_BQ_EXPORT_RUNNING_LOCK_NAME +
                                   schema.upper()))