Exemple #1
0
def create_all_jails_bq_refresh_tasks() -> Tuple[str, int]:
    """Creates an export task for each table to be exported.

    A task is created for each table defined in the JailsBase schema.

    Re-creates all tasks if any task fails to be created.
    """
    logging.info("Beginning BQ export for jails schema tables.")

    task_manager = BQRefreshCloudTaskManager()

    cloud_sql_to_bq_config = CloudSqlToBQConfig.for_schema_type(
        SchemaType.JAILS)
    for table in cloud_sql_to_bq_config.get_tables_to_export():
        task_manager.create_refresh_bq_table_task(table.name, SchemaType.JAILS)
    return ('', HTTPStatus.OK)
def wait_for_ingest_to_create_tasks(schema_arg: str) -> Tuple[str, HTTPStatus]:
    """Worker function to wait until ingest is not running to create_all_bq_refresh_tasks_for_schema.
    When ingest is not running/locked, creates task to create_all_bq_refresh_tasks_for_schema.
    When ingest is running/locked, re-enqueues this task to run again in 60 seconds.
    """
    task_manager = BQRefreshCloudTaskManager()
    lock_manager = GCSPseudoLockManager()
    json_data_text = request.get_data(as_text=True)
    try:
        json_data = json.loads(json_data_text)
    except (TypeError, json.decoder.JSONDecodeError):
        json_data = {}
    if "lock_id" not in json_data:
        lock_id = str(uuid.uuid4())
    else:
        lock_id = json_data["lock_id"]
    logging.info("Request lock id: %s", lock_id)

    if not lock_manager.is_locked(
            postgres_to_bq_lock_name_with_suffix(schema_arg)):
        time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
        contents_as_json = {"time": time, "lock_id": lock_id}
        contents = json.dumps(contents_as_json)
        lock_manager.lock(postgres_to_bq_lock_name_with_suffix(schema_arg),
                          contents)
    else:
        contents = lock_manager.get_lock_contents(
            postgres_to_bq_lock_name_with_suffix(schema_arg))
        try:
            contents_json = json.loads(contents)
        except (TypeError, json.decoder.JSONDecodeError):
            contents_json = {}
        logging.info("Lock contents: %s", contents_json)
        if lock_id != contents_json.get("lock_id"):
            raise GCSPseudoLockAlreadyExists(
                f"UUID {lock_id} does not match existing lock's UUID")

    no_regions_running = lock_manager.no_active_locks_with_prefix(
        GCS_TO_POSTGRES_INGEST_RUNNING_LOCK_NAME)
    if not no_regions_running:
        logging.info("Regions running, renqueuing this task.")
        task_id = "{}-{}-{}".format("renqueue_wait_task",
                                    str(datetime.utcnow().date()),
                                    uuid.uuid4())
        body = {"schema_type": schema_arg, "lock_id": lock_id}
        task_manager.job_monitor_cloud_task_queue_manager.create_task(
            task_id=task_id,
            body=body,
            relative_uri=
            f"/cloud_sql_to_bq/create_refresh_bq_tasks/{schema_arg}",
            schedule_delay_seconds=60,
        )
        return "", HTTPStatus.OK
    logging.info("No regions running, calling create_refresh_bq_tasks")
    create_all_bq_refresh_tasks_for_schema(schema_arg)
    return "", HTTPStatus.OK
def monitor_refresh_bq_tasks() -> Tuple[str, int]:
    """Worker function to publish a message to a Pub/Sub topic once all tasks in
    the BIGQUERY_QUEUE queue have completed.
    """
    json_data = request.get_data(as_text=True)
    data = json.loads(json_data)
    schema = data["schema"]
    topic = data["topic"]
    message = data["message"]

    task_manager = BQRefreshCloudTaskManager()

    # If any of the tasks in the queue have task_name containing schema, consider BQ tasks in queue
    bq_tasks_in_queue = False
    bq_task_list = task_manager.get_bq_queue_info().task_names
    for task_name in bq_task_list:
        task_id = task_name[task_name.find("/tasks/"):]
        if schema in task_id:
            bq_tasks_in_queue = True

    # If there are BQ tasks in the queue, then re-queue this task in a minute
    if bq_tasks_in_queue:
        logging.info("Tasks still in bigquery queue. Re-queuing bq monitor"
                     " task.")
        task_manager.create_bq_refresh_monitor_task(schema, topic, message)
        return "", HTTPStatus.OK

    # Publish a message to the Pub/Sub topic once state BQ export is complete
    if topic:
        pubsub_helper.publish_message_to_topic(message=message, topic=topic)

    # Unlock export lock when all BQ exports complete
    lock_manager = GCSPseudoLockManager()
    lock_manager.unlock(postgres_to_bq_lock_name_with_suffix(schema))
    logging.info(
        "Done running export for %s, unlocking Postgres to BigQuery export",
        schema)

    # Kick scheduler to restart ingest
    kick_all_schedulers()

    return ("", HTTPStatus.OK)
def create_all_bq_refresh_tasks_for_schema(schema_arg: str) -> None:
    """Creates an export task for each table to be exported.

    A task is created for each table defined in the schema.

    Re-creates all tasks if any task fails to be created.
    """
    try:
        schema_type = SchemaType(schema_arg.upper())
    except ValueError:
        return

    logging.info("Beginning BQ export for %s schema tables.",
                 schema_type.value)

    task_manager = BQRefreshCloudTaskManager()

    cloud_sql_to_bq_config = CloudSqlToBQConfig.for_schema_type(schema_type)
    if cloud_sql_to_bq_config is None:
        logging.info("Cloud SQL to BQ is disabled for: %s", schema_type)
        return

    for table in cloud_sql_to_bq_config.get_tables_to_export():
        task_manager.create_refresh_bq_table_task(table.name, schema_type)

    if schema_type is SchemaType.STATE:
        pub_sub_topic = "v1.calculator.trigger_daily_pipelines"
        pub_sub_message = "State export to BQ complete"
    else:
        pub_sub_topic = ""
        pub_sub_message = ""

    task_manager.create_bq_refresh_monitor_task(schema_type.value,
                                                pub_sub_topic, pub_sub_message)
Exemple #5
0
    def test_reattempt_create_refresh_tasks_task(
        self, mock_client: mock.MagicMock, mock_uuid: mock.MagicMock
    ) -> None:
        # Arrange
        delay_sec = 60
        now_utc_timestamp = int(datetime.datetime.now().timestamp())

        uuid = "random-uuid"
        mock_uuid.uuid4.return_value = uuid

        schema = "fake_schema"
        lock_id = "fake_lock_id"
        queue_path = f"queue_path/{self.mock_project_id}/{QUEUES_REGION}"
        task_id = "reenqueue_wait_task-2019-04-13-random-uuid"
        task_path = f"{queue_path}/{task_id}"

        body = {
            "lock_id": lock_id,
        }

        mock_client.return_value.task_path.return_value = task_path
        mock_client.return_value.queue_path.return_value = queue_path

        # Act
        BQRefreshCloudTaskManager().create_reattempt_create_refresh_tasks_task(
            schema=schema, lock_id=lock_id
        )

        # Assert
        mock_client.return_value.queue_path.assert_called_with(
            self.mock_project_id, QUEUES_REGION, JOB_MONITOR_QUEUE_V2
        )
        mock_client.return_value.task_path.assert_called_with(
            self.mock_project_id, QUEUES_REGION, JOB_MONITOR_QUEUE_V2, task_id
        )

        expected_task = tasks_v2.types.task_pb2.Task(
            name=task_path,
            schedule_time=timestamp_pb2.Timestamp(
                seconds=(now_utc_timestamp + delay_sec)
            ),
            app_engine_http_request={
                "http_method": "POST",
                "relative_uri": "/cloud_sql_to_bq/create_refresh_bq_schema_task/fake_schema",
                "body": json.dumps(body).encode(),
            },
        )

        mock_client.return_value.create_task.assert_called_with(
            parent=queue_path, task=expected_task
        )
Exemple #6
0
def monitor_refresh_bq_tasks() -> Tuple[str, int]:
    """Worker function to publish a message to a Pub/Sub topic once all tasks in
    the BIGQUERY_QUEUE queue have completed.
    """
    json_data = request.get_data(as_text=True)
    data = json.loads(json_data)
    topic = data['topic']
    message = data['message']

    task_manager = BQRefreshCloudTaskManager()

    bq_tasks_in_queue = task_manager.get_bq_queue_info().size() > 0

    # If there are BQ tasks in the queue, then re-queue this task in a minute
    if bq_tasks_in_queue:
        logging.info("Tasks still in bigquery queue. Re-queuing bq monitor"
                     " task.")
        task_manager.create_bq_refresh_monitor_task(topic, message)
        return ('', HTTPStatus.OK)

    # Publish a message to the Pub/Sub topic once all BQ exports are complete
    pubsub_helper.publish_message_to_topic(message=message, topic=topic)

    return ('', HTTPStatus.OK)
    def test_create_bq_refresh_monitor_task(self, mock_client: mock.MagicMock,
                                            mock_uuid: mock.MagicMock) -> None:
        # Arrange
        delay_sec = 60
        now_utc_timestamp = int(datetime.datetime.now().timestamp())

        uuid = "random-uuid"
        mock_uuid.uuid4.return_value = uuid

        schema = "schema"
        topic = "fake.topic"
        message = "A fake message"
        queue_path = f"queue_path/{self.mock_project_id}/{QUEUES_REGION}"
        task_id = "fake-topic-2019-04-13-random-uuid"
        task_path = f"{queue_path}/{task_id}"

        body = {
            "schema": schema,
            "topic": topic,
            "message": message,
        }

        task = tasks_v2.types.task_pb2.Task(
            name=task_path,
            schedule_time=timestamp_pb2.Timestamp(seconds=(now_utc_timestamp +
                                                           delay_sec)),
            app_engine_http_request={
                "http_method": "POST",
                "relative_uri": "/cloud_sql_to_bq/monitor_refresh_bq_tasks",
                "body": json.dumps(body).encode(),
            },
        )

        mock_client.return_value.task_path.return_value = task_path
        mock_client.return_value.queue_path.return_value = queue_path

        # Act
        BQRefreshCloudTaskManager().create_bq_refresh_monitor_task(
            schema="schema", topic=topic, message=message)

        # Assert
        mock_client.return_value.queue_path.assert_called_with(
            self.mock_project_id, QUEUES_REGION, JOB_MONITOR_QUEUE_V2)
        mock_client.return_value.task_path.assert_called_with(
            self.mock_project_id, QUEUES_REGION, JOB_MONITOR_QUEUE_V2, task_id)
        mock_client.return_value.create_task.assert_called_with(
            parent=queue_path, task=task)
Exemple #8
0
    def test_create_bq_refresh_monitor_task(self, mock_client: mock.MagicMock,
                                            mock_uuid: mock.MagicMock) -> None:
        # Arrange
        delay_sec = 60
        now_utc_timestamp = int(datetime.datetime.now().timestamp())

        uuid = 'random-uuid'
        mock_uuid.uuid4.return_value = uuid

        project_id = 'recidiviz-456'
        topic = 'fake.topic'
        message = 'A fake message'
        queue_path = f'queue_path/{project_id}/{QUEUES_REGION}'
        task_id = 'fake-topic-2019-04-13-random-uuid'
        task_path = f'{queue_path}/{task_id}'

        body = {
            'topic': topic,
            'message': message,
        }

        task = tasks_v2.types.task_pb2.Task(
            name=task_path,
            schedule_time=timestamp_pb2.Timestamp(seconds=(now_utc_timestamp +
                                                           delay_sec)),
            app_engine_http_request={
                'http_method': 'POST',
                'relative_uri': '/cloud_sql_to_bq/monitor_refresh_bq_tasks',
                'body': json.dumps(body).encode()
            })

        mock_client.return_value.task_path.return_value = task_path
        mock_client.return_value.queue_path.return_value = queue_path

        # Act
        BQRefreshCloudTaskManager(project_id=project_id). \
            create_bq_refresh_monitor_task(topic=topic, message=message)

        # Assert
        mock_client.return_value.queue_path.assert_called_with(
            project_id, QUEUES_REGION, JOB_MONITOR_QUEUE_V2)
        mock_client.return_value.task_path.assert_called_with(
            project_id, QUEUES_REGION, JOB_MONITOR_QUEUE_V2, task_id)
        mock_client.return_value.create_task.assert_called_with(
            parent=queue_path, task=task)
Exemple #9
0
    def test_create_refresh_bq_schema_task(
        self, mock_client: mock.MagicMock, mock_uuid: mock.MagicMock
    ) -> None:
        # Arrange
        uuid = "random-uuid"
        mock_uuid.uuid4.return_value = uuid

        schema_type = SchemaType.JAILS.value
        queue_path = f"queue_path/{self.mock_project_id}/{QUEUES_REGION}"
        task_id = f"{schema_type}-2019-04-12-random-uuid"
        task_path = f"{queue_path}/{task_id}"

        task = tasks_v2.types.task_pb2.Task(
            name=task_path,
            app_engine_http_request={
                "http_method": "POST",
                "relative_uri": "/cloud_sql_to_bq/refresh_bq_schema/JAILS",
                "body": json.dumps({}).encode(),
            },
        )

        mock_client.return_value.task_path.return_value = task_path
        mock_client.return_value.queue_path.return_value = queue_path

        # Act
        BQRefreshCloudTaskManager().create_refresh_bq_schema_task(
            schema_type=SchemaType.JAILS
        )

        # Assert
        mock_client.return_value.queue_path.assert_called_with(
            self.mock_project_id, QUEUES_REGION, BIGQUERY_QUEUE_V2
        )
        mock_client.return_value.task_path.assert_called_with(
            self.mock_project_id, QUEUES_REGION, BIGQUERY_QUEUE_V2, task_id
        )
        mock_client.return_value.create_task.assert_called_with(
            parent=queue_path, task=task
        )
Exemple #10
0
    def test_create_refresh_bq_table_task(self, mock_client: mock.MagicMock,
                                          mock_uuid: mock.MagicMock) -> None:
        # Arrange
        uuid = 'random-uuid'
        mock_uuid.uuid4.return_value = uuid

        project_id = 'recidiviz-456'
        table_name = 'test_table'
        schema_type = SchemaType.JAILS.value
        queue_path = f'queue_path/{project_id}/{QUEUES_REGION}'
        task_id = f'test_table-{schema_type}-2019-04-12-random-uuid'
        task_path = f'{queue_path}/{task_id}'

        body = {'table_name': table_name, 'schema_type': schema_type}

        task = tasks_v2.types.task_pb2.Task(
            name=task_path,
            app_engine_http_request={
                'http_method': 'POST',
                'relative_uri': '/cloud_sql_to_bq/refresh_bq_table',
                'body': json.dumps(body).encode()
            })

        mock_client.return_value.task_path.return_value = task_path
        mock_client.return_value.queue_path.return_value = queue_path

        # Act
        BQRefreshCloudTaskManager(project_id=project_id). \
            create_refresh_bq_table_task(table_name=table_name, schema_type=SchemaType.JAILS)

        # Assert
        mock_client.return_value.queue_path.assert_called_with(
            project_id, QUEUES_REGION, BIGQUERY_QUEUE_V2)
        mock_client.return_value.task_path.assert_called_with(
            project_id, QUEUES_REGION, BIGQUERY_QUEUE_V2, task_id)
        mock_client.return_value.create_task.assert_called_with(
            parent=queue_path, task=task)
Exemple #11
0
def create_all_state_bq_refresh_tasks() -> Tuple[str, int]:
    """Creates an export task for each table to be exported.

    A task is created for each table defined in the StateBase schema.

    Re-creates all tasks if any task fails to be created.
    """
    logging.info("Beginning BQ export for state schema tables.")

    task_manager = BQRefreshCloudTaskManager()

    cloud_sql_to_bq_config = CloudSqlToBQConfig.for_schema_type(
        SchemaType.STATE)
    for table in cloud_sql_to_bq_config.get_tables_to_export():
        task_manager.create_refresh_bq_table_task(table.name, SchemaType.STATE)

    pub_sub_topic = 'v1.calculator.recidivism'
    pub_sub_message = 'State export to BQ complete'
    task_manager.create_bq_refresh_monitor_task(pub_sub_topic, pub_sub_message)
    return ('', HTTPStatus.OK)
Exemple #12
0
def wait_for_ingest_to_create_tasks(schema_arg: str) -> Tuple[str, HTTPStatus]:
    """Worker function to wait until ingest is not running to queue a task to run
    /refresh_bq_schema. Before doing anything, grabs the refresh lock to indicate that
    a refresh wants to start and ingest should yield ASAP. Then:
    * When ingest is not running/locked, creates task to run /refresh_bq_schema.
    * When ingest is running/locked, re-enqueues this task to run again in 60 seconds.
    """
    try:
        schema_type = SchemaType(schema_arg.upper())
    except ValueError:
        return (
            f"Unexpected value for schema_arg: [{schema_arg}]",
            HTTPStatus.BAD_REQUEST,
        )
    if not CloudSqlToBQConfig.is_valid_schema_type(schema_type):
        return (
            f"Unsuppported schema type: [{schema_type}]",
            HTTPStatus.BAD_REQUEST,
        )

    lock_id = get_or_create_lock_id()
    logging.info("Request lock id: %s", lock_id)

    lock_manager = CloudSqlToBQLockManager()
    lock_manager.acquire_lock(schema_type=schema_type, lock_id=lock_id)

    task_manager = BQRefreshCloudTaskManager()
    if not lock_manager.can_proceed(schema_type):
        logging.info("Regions running, renqueuing this task.")
        task_manager.create_reattempt_create_refresh_tasks_task(
            lock_id=lock_id, schema=schema_arg)
        return "", HTTPStatus.OK

    logging.info("No regions running, triggering BQ refresh.")
    task_manager.create_refresh_bq_schema_task(schema_type=schema_type)
    return "", HTTPStatus.OK