def _get_database_name_for_state(state_code: StateCode,
                                  instance: DirectIngestInstance) -> str:
     """Returns the database name for the given state and instance"""
     return SQLAlchemyDatabaseKey.for_state_code(
         state_code,
         instance.database_version(SystemLevel.STATE,
                                   state_code=state_code),
     ).db_name
 def _main_database_key(cls) -> "SQLAlchemyDatabaseKey":
     if cls.schema_type() == SchemaType.STATE:
         state_code = StateCode(cls.region_code().upper())
         return SQLAlchemyDatabaseKey.for_state_code(
             state_code,
             cls._main_ingest_instance().database_version(
                 SystemLevel.STATE, state_code=state_code),
         )
     return SQLAlchemyDatabaseKey.for_schema(cls.schema_type())
예제 #3
0
    def test_for_state_code(self) -> None:
        primary = SQLAlchemyDatabaseKey.for_state_code(
            StateCode.US_MN, db_version=SQLAlchemyStateDatabaseVersion.PRIMARY)
        secondary = SQLAlchemyDatabaseKey.for_state_code(
            StateCode.US_MN,
            db_version=SQLAlchemyStateDatabaseVersion.SECONDARY)

        self.assertEqual(
            SQLAlchemyDatabaseKey(schema_type=SchemaType.STATE,
                                  db_name="us_mn_primary"),
            primary,
        )

        self.assertEqual(
            SQLAlchemyDatabaseKey(schema_type=SchemaType.STATE,
                                  db_name="us_mn_secondary"),
            secondary,
        )
예제 #4
0
    def ingest_database_key(self) -> SQLAlchemyDatabaseKey:
        schema_type = self.system_level.schema_type()
        if schema_type == SchemaType.STATE:
            state_code = StateCode(self.region_code().upper())
            return SQLAlchemyDatabaseKey.for_state_code(
                state_code,
                self.ingest_instance.database_version(self.system_level,
                                                      state_code=state_code),
            )

        return SQLAlchemyDatabaseKey.for_schema(schema_type)
예제 #5
0
    def test_state_legacy_db(self) -> None:
        db_key_1 = SQLAlchemyDatabaseKey(schema_type=SchemaType.STATE)
        db_key_1_dup = SQLAlchemyDatabaseKey.canonical_for_schema(
            schema_type=SchemaType.STATE)
        self.assertEqual(db_key_1, db_key_1_dup)

        # TODO(#7984): Once we have cut over all traffic to non-legacy state DBs and
        #  removed the LEGACY database version, remove this part of the test.
        db_key_legacy = SQLAlchemyDatabaseKey.for_state_code(
            StateCode.US_AK, SQLAlchemyStateDatabaseVersion.LEGACY)
        self.assertEqual(db_key_1, db_key_legacy)
예제 #6
0
    def test_key_attributes_state(self) -> None:
        key = SQLAlchemyDatabaseKey.for_state_code(
            StateCode.US_MN, db_version=SQLAlchemyStateDatabaseVersion.PRIMARY)

        self.assertEqual(key.declarative_meta, StateBase)

        self.assertTrue(os.path.exists(key.alembic_file))
        self.assertTrue(
            key.alembic_file.endswith("migrations/state_alembic.ini"))

        self.assertTrue(os.path.exists(key.migrations_location))
        self.assertTrue(key.migrations_location.endswith("/migrations/state"))

        self.assertEqual(key.isolation_level, "SERIALIZABLE")
예제 #7
0
    def _import_database_from_gcs() -> Tuple[str, HTTPStatus]:
        try:
            state_code = StateCode(request.json["stateCode"])
            db_version = SQLAlchemyStateDatabaseVersion(
                request.json["importToDatabaseVersion"].lower())
            ingest_instance = DirectIngestInstance.for_state_database_version(
                database_version=db_version, state_code=state_code)
            exported_db_version = SQLAlchemyStateDatabaseVersion(
                request.json["exportedDatabaseVersion"].lower())
        except ValueError:
            return "invalid parameters provided", HTTPStatus.BAD_REQUEST

        if db_version == SQLAlchemyStateDatabaseVersion.LEGACY:
            return "ingestInstance cannot be LEGACY", HTTPStatus.BAD_REQUEST

        lock_manager = DirectIngestRegionLockManager.for_state_ingest(
            state_code, ingest_instance=ingest_instance)
        if not lock_manager.can_proceed():
            return (
                "other locks blocking ingest have been acquired; aborting operation",
                HTTPStatus.CONFLICT,
            )

        db_key = SQLAlchemyDatabaseKey.for_state_code(state_code, db_version)
        cloud_sql_client = CloudSQLClientImpl(project_id=project_id)

        operation_id = cloud_sql_client.import_gcs_sql(
            db_key,
            GcsfsFilePath.from_absolute_path(
                f"{STATE_INGEST_EXPORT_URI}/{exported_db_version.value}/{state_code.value}"
            ),
        )
        if operation_id is None:
            return (
                "Cloud SQL import operation was not started successfully.",
                HTTPStatus.INTERNAL_SERVER_ERROR,
            )

        operation_succeeded = cloud_sql_client.wait_until_operation_completed(
            operation_id, seconds_to_wait=GCS_IMPORT_EXPORT_TIMEOUT_SEC)
        if not operation_succeeded:
            return (
                "Cloud SQL import did not complete within 60 seconds",
                HTTPStatus.INTERNAL_SERVER_ERROR,
            )

        return operation_id, HTTPStatus.OK
예제 #8
0
    def test_build_view_state_legacy(self) -> None:
        table = one(t for t in StateBase.metadata.sorted_tables
                    if t.name == "state_person")
        view_builder = FederatedCloudSQLTableBigQueryViewBuilder(
            connection_region="us-east2",
            table=table,
            view_id=table.name,
            cloud_sql_query="SELECT * FROM state_person;",
            database_key=SQLAlchemyDatabaseKey.for_state_code(
                StateCode.US_XX,
                db_version=SQLAlchemyStateDatabaseVersion.LEGACY),
            materialized_address_override=BigQueryAddress(
                dataset_id="materialized_dataset",
                table_id="materialized_table"),
        )
        expected_description = """View providing a connection to the [state_person]
table in the [postgres] database in the [STATE] schema. This view is 
managed outside of regular view update operations and the results can be found in the 
schema-specific datasets (`state`, `jails`, `justice_counts`, etc)."""

        expected_view_query = f"""/*{expected_description}*/
SELECT
    *
FROM EXTERNAL_QUERY(
    "test-project.us-east2.state_cloudsql",
    "SELECT * FROM state_person;"
)"""

        # Build without dataset overrides
        view = view_builder.build()

        self.assertIsInstance(view, FederatedCloudSQLTableBigQueryView)
        self.assertEqual(expected_view_query, view.view_query)
        self.assertEqual(expected_description, view.description)
        self.assertEqual(
            BigQueryAddress(
                dataset_id="state_cloudsql_connection",
                table_id="state_person",
            ),
            view.address,
        )
        self.assertEqual(
            BigQueryAddress(dataset_id="materialized_dataset",
                            table_id="materialized_table"),
            view.materialized_address,
        )
예제 #9
0
def main(state_code: StateCode, ingest_instance: DirectIngestInstance) -> None:
    """Executes the main flow of the script."""
    print(
        f"RUN THE FOLLOWING COMMANDS IN ORDER TO DELETE ALL DATA FOR REGION [{state_code.value}]"
    )
    print(
        "********************************************************************************"
    )
    db_version = ingest_instance.database_version(SystemLevel.STATE,
                                                  state_code)
    db_key = SQLAlchemyDatabaseKey.for_state_code(state_code=state_code,
                                                  db_version=db_version)

    # Connect to correct database for instance first
    print(f"\\c {db_key.db_name}")

    # Then run deletion commands
    for cmd in generate_region_deletion_commands(state_code, db_version):
        print(cmd)

    print(
        "********************************************************************************"
    )
    print("HOW TO PERFORM DELETION:")
    print(
        "1) Log into prod data client (`gcloud compute ssh prod-data-client --project=recidiviz-123`)"
    )
    print("\n> For production deletion:")
    print(
        "2) Go to secret manager to get login credentials stored in `state_db_user` and `state_db_password` secrets:"
        "\n\thttps://console.cloud.google.com/security/secret-manager?organizationId=448885369991&"
        "project=recidiviz-123")
    print("3) Log into postgres database (`prod-state-psql`)")
    print("\n> For staging deletion:")
    print(
        "2) Go to secret manager to get login credentials stored in `state_db_user` and `state_db_password` secrets:"
        "\n\thttps://console.cloud.google.com/security/secret-manager?organizationId=448885369991&"
        "project=recidiviz-staging")
    print("3) Log into postgres database (`dev-state-psql`)")
    print("\n> For all:")
    print(
        "4) Paste full list of commands listed above in postgres command line and run. Some commands may take a "
        "while to run.")
예제 #10
0
    def database_key_for_segment(
            self, state_code: StateCode) -> SQLAlchemyDatabaseKey:
        """Returns a key for the database associated with a particular state segment.
        Throws for unsegmented schemas.
        """
        if not self.is_state_segmented_refresh_schema():
            raise ValueError(
                f"Only expect state-segmented schemas, found [{self.schema_type}]"
            )

        if self.schema_type == SchemaType.STATE:
            if not self.direct_ingest_instance:
                raise ValueError(
                    "Expected DirectIngestInstance to be non-None for STATE schema."
                )
            return SQLAlchemyDatabaseKey.for_state_code(
                state_code=state_code,
                db_version=self.direct_ingest_instance.database_version(
                    SystemLevel.STATE, state_code=state_code),
            )

        return SQLAlchemyDatabaseKey.for_schema(self.schema_type)
예제 #11
0
def main(
    state_code: StateCode,
    database_version: SQLAlchemyStateDatabaseVersion,
    ssl_cert_path: str,
    purge_schema: bool,
) -> None:
    """
    Invokes the main code path for running a downgrade.

    This checks for user validations that the database and branches are correct and then runs the downgrade
    migration.
    """
    # TODO(#7984): Once we have cut all traffic over to single-database traffic,
    #   delete this branch.
    if database_version == SQLAlchemyStateDatabaseVersion.LEGACY:
        logging.error(
            "Should not invoke purge_state_db script with legacy database version."
        )
        sys.exit(1)

    is_prod = metadata.project_id() == GCP_PROJECT_PRODUCTION
    if is_prod:
        logging.info("RUNNING AGAINST PRODUCTION\n")

    purge_str = ("PURGE DATABASE STATE IN STAGING" if metadata.project_id()
                 == GCP_PROJECT_STAGING else "PURGE DATABASE STATE IN PROD")
    prompt_for_confirmation(
        f"This script will PURGE all data for for [{state_code.value}] in DB [{database_version.value}].",
        purge_str,
    )
    if purge_schema:
        purge_schema_str = ("RUN DOWNGRADE MIGRATIONS IN STAGING"
                            if metadata.project_id() == GCP_PROJECT_STAGING
                            else "RUN DOWNGRADE MIGRATIONS IN PROD")
        prompt_for_confirmation(
            f"This script will run all DOWNGRADE migrations for "
            f"[{state_code.value}] in DB [{database_version.value}].",
            purge_schema_str,
        )

    db_key = SQLAlchemyDatabaseKey.for_state_code(state_code, database_version)

    with SessionFactory.for_prod_data_client(db_key, ssl_cert_path) as session:
        truncate_commands = [
            "TRUNCATE TABLE state_person CASCADE;",
            "TRUNCATE TABLE state_agent CASCADE;",
        ]
        for command in truncate_commands:
            logging.info('Running query ["%s"]. This may take a while...',
                         command)
            session.execute(command)

        logging.info("Done running truncate commands.")

    if purge_schema:
        with SessionFactory.for_prod_data_client(
                db_key, ssl_cert_path) as purge_session:
            overriden_env_vars = None
            try:
                logging.info("Purging schema...")
                overriden_env_vars = SQLAlchemyEngineManager.update_sqlalchemy_env_vars(
                    database_key=db_key,
                    ssl_cert_path=ssl_cert_path,
                    migration_user=True,
                )
                config = alembic.config.Config(db_key.alembic_file)
                alembic.command.downgrade(config, "base")

                # We need to manually delete alembic_version because it's leftover after
                # the downgrade migrations
                purge_session.execute("DROP TABLE alembic_version;")
            finally:
                if overriden_env_vars:
                    local_postgres_helpers.restore_local_env_vars(
                        overriden_env_vars)

        logging.info("Purge complete.")