def testInitEngines_usesCorrectIsolationLevelsInStaging(
            self, mock_get_secret, mock_in_gae, mock_in_staging,
            mock_create_engine):
        # Arrange
        mock_in_gae.return_value = True
        mock_in_staging.return_value = True
        # Pretend all secret values are just the key suffixed with '_value'
        mock_get_secret.side_effect = lambda key: f'{key}_value'

        # Act
        SQLAlchemyEngineManager.init_engines_for_server_postgres_instances()

        # Assert
        assert mock_create_engine.call_args_list == [
            call(
                'postgresql://*****:*****@/sqlalchemy_db_name_value'
                '?host=/cloudsql/cloudsql_instance_id_value',
                isolation_level=None,
                pool_recycle=600),
            call(
                'postgresql://*****:*****@/state_db_name_value'
                '?host=/cloudsql/state_cloudsql_instance_id_value',
                isolation_level='SERIALIZABLE',
                pool_recycle=600),
            call(
                'postgresql://*****:*****@/operations_db_name_value'
                '?host=/cloudsql/operations_cloudsql_instance_id_value',
                isolation_level=None,
                pool_recycle=600),
            call(
                'postgresql://*****:*****@/'
                'justice_counts_db_name_value?host=/cloudsql/justice_counts_cloudsql_instance_id_value',
                isolation_level='SERIALIZABLE',
                pool_recycle=600),
        ]
Exemple #2
0
def main(database: SchemaType, repo_root: str, ssl_cert_path: str) -> None:
    """
    Invokes the main code path for running a downgrade.

    This checks for user validations that the database and branches are correct and then runs the downgrade
    migration.
    """

    is_prod = metadata.project_id() == GCP_PROJECT_PRODUCTION
    if is_prod:
        logging.info("RUNNING AGAINST PRODUCTION\n")

    prompt_for_confirmation("This script will run a DOWNGRADE migration.", "DOWNGRADE")
    confirm_correct_db(database)
    confirm_correct_git_branch(repo_root, is_prod=is_prod)

    overriden_env_vars = SQLAlchemyEngineManager.update_sqlalchemy_env_vars(
        database,
        ssl_cert_path=ssl_cert_path,
        migration_user=True,
    )

    # Run downgrade
    try:
        config = alembic.config.Config(
            SQLAlchemyEngineManager.get_alembic_file(database)
        )
        alembic.command.downgrade(config, "-1")
    except Exception as e:
        logging.error("Downgrade failed to run: %s", e)
        sys.exit(1)
    finally:
        local_postgres_helpers.restore_local_env_vars(overriden_env_vars)
 def default_config(self) -> Dict[str, str]:
     return {
         'file':
         SQLAlchemyEngineManager.get_alembic_file(self.schema_type),
         'script_location':
         SQLAlchemyEngineManager.get_migrations_location(self.schema_type),
     }
Exemple #4
0
def teardown_in_memory_sqlite_databases():
    """Cleans up state after a test started with use_in_memory_sqlite_database() is complete."""
    if environment.in_gae():
        raise ValueError('Running test-only code in Google App Engine.')

    global _in_memory_sqlite_connection_thread_ids
    _in_memory_sqlite_connection_thread_ids.clear()
    SQLAlchemyEngineManager.teardown_engines()
Exemple #5
0
def use_on_disk_sqlite_database(declarative_base: DeclarativeMeta) -> None:
    """Creates a new SqlDatabase object used to communicate to an on-disk
    sqlite database.

    This includes:
    1. Creates a new on-disk sqlite database engine
    2. Create all tables in the newly created sqlite database
    3. Bind the global SessionMaker to the new database engine
    """
    SQLAlchemyEngineManager.init_engine_for_db_instance(
        db_url='sqlite:///recidiviz.db', schema_base=declarative_base)
Exemple #6
0
def use_in_memory_sqlite_database(declarative_base: DeclarativeMeta) -> None:
    """Creates a new SqlDatabase object used to communicate to a fake in-memory
    sqlite database.

    This includes:
    1. Creates a new in memory sqlite database engine
    2. Create all tables in the newly created sqlite database
    3. Bind the global SessionMaker to the new fake database engine
    """

    SQLAlchemyEngineManager.init_engine_for_db_instance(
        db_url='sqlite:///:memory:', schema_base=declarative_base)
Exemple #7
0
def use_on_disk_postgresql_database(declarative_base: DeclarativeMeta) -> None:
    """Connects SQLAlchemy to a local test postgres server. Should be called after the test database and user have
    already been initialized.

    This includes:
    1. Create all tables in the newly created sqlite database
    2. Bind the global SessionMaker to the new database engine
    """
    if declarative_base not in DECLARATIVE_BASES:
        raise ValueError(f"Unexpected declarative base: {declarative_base}.")

    SQLAlchemyEngineManager.init_engine_for_postgres_instance(
        db_url=on_disk_postgres_db_url(), schema_base=declarative_base)
Exemple #8
0
def use_on_disk_sqlite_database(declarative_base: DeclarativeMeta) -> None:
    """Creates a new SqlDatabase object used to communicate to an on-disk
    sqlite database.

    This includes:
    1. Creates a new on-disk sqlite database engine
    2. Create all tables in the newly created sqlite database
    3. Bind the global SessionMaker to the new database engine
    """
    if environment.in_gae():
        raise ValueError('Running test-only code in Google App Engine.')

    SQLAlchemyEngineManager.init_engine_for_db_instance(
        db_url='sqlite:///recidiviz.db', schema_base=declarative_base)
Exemple #9
0
    def testInitEngines_usesCorrectIsolationLevelsInStaging(self, mock_in_gae, mock_in_staging, mock_create_engine):
        # Arrange
        mock_in_gae.return_value = True
        mock_in_staging.return_value = True

        # Act
        SQLAlchemyEngineManager.init_engines_for_server_postgres_instances()

        # Assert
        assert mock_create_engine.call_args_list == [
            call('path', isolation_level=None, pool_recycle=600),
            call('path', isolation_level='SERIALIZABLE', pool_recycle=600),
            call('path', isolation_level=None, pool_recycle=600),
        ]
Exemple #10
0
def stop_and_clear_on_disk_postgresql_database() -> None:
    """Drops all tables in the local postgres database and stops the postgres server. Should be called in the
    tearDownClass function so this only runs once per test class."""
    if environment.in_gae():
        raise ValueError('Running test-only code in Google App Engine.')

    for declarative_base in DECLARATIVE_BASES:
        use_on_disk_postgresql_database(declarative_base)
        declarative_base.metadata.drop_all(SQLAlchemyEngineManager.get_engine_for_schema_base(declarative_base))
        SQLAlchemyEngineManager.teardown_engine_for_schema(declarative_base)

    if not environment.in_travis():
        # If we are running locally, we must stop the local postgres server
        os.system('pg_ctl -D /usr/local/var/postgres stop &> /dev/null')
Exemple #11
0
def create_export_context(
    schema_type: SchemaType, export_uri: str, export_query: str
) -> dict:
    """Creates the exportContext configuration for the export operation.

    See here for details:
    https://cloud.google.com/sql/docs/postgres/admin-api/v1beta4/instances/export

    Args:
        schema_type: The SchemaType of the table being exported.
        export_uri: GCS URI to write the exported CSV data to.
        export_query: SQL query defining the data to be exported.

    Returns:
        export_context dict which can be passed to client.instances.export().
    """

    export_context = {
        "exportContext": {
            "kind": "sql#exportContext",
            "fileType": "CSV",
            "uri": export_uri,
            "databases": [SQLAlchemyEngineManager.get_db_name(schema_type)],
            "csvExportOptions": {"selectQuery": export_query},
        }
    }

    return export_context
Exemple #12
0
    def setUp(self) -> None:
        self.metadata_patcher = mock.patch(
            "recidiviz.utils.metadata.project_id")
        self.mock_project_id_fn = self.metadata_patcher.start()
        self.mock_project_id_fn.return_value = "recidiviz-staging"

        test_secrets = {
            # pylint: disable=protected-access
            SQLAlchemyEngineManager._get_cloudsql_instance_id_key(schema_type):
            f"test-project:us-east2:{schema_type.value}-data"
            for schema_type in SchemaType
        }
        self.get_secret_patcher = mock.patch(
            "recidiviz.utils.secrets.get_secret")

        self.get_secret_patcher.start().side_effect = test_secrets.get

        self.gcs_factory_patcher = mock.patch(
            "recidiviz.admin_panel.dataset_metadata_store.GcsfsFactory.build")

        self.fake_fs = FakeGCSFileSystem()
        self.gcs_factory_patcher.start().return_value = self.fake_fs

        self.fake_config_path = GcsfsFilePath.from_absolute_path(
            "gs://recidiviz-staging-configs/cloud_sql_to_bq_config.yaml")
    def for_prod_data_client(
        cls,
        database_key: SQLAlchemyDatabaseKey,
        ssl_cert_path: str,
        *,
        autocommit: bool = True,
    ) -> Iterator[Session]:
        """Implements a context manager for db sessions for use in prod-data-client."""
        engine = SQLAlchemyEngineManager.get_engine_for_database_with_ssl_certs(
            database_key=database_key, ssl_cert_path=ssl_cert_path)
        if engine is None:
            raise ValueError(f"No engine set for key [{database_key}]")

        try:
            session = Session(bind=engine)
            cls._alter_session_variables(session)
            cls._apply_session_listener_for_schema_base(
                database_key.declarative_meta, session)
            yield session
            if autocommit:
                try:
                    session.commit()
                except Exception as e:
                    session.rollback()
                    raise e
        finally:
            session.close()
def export_from_cloud_sql_to_gcs_csv(
    schema_type: SchemaType,
    table_name: str,
    gcs_uri: GcsfsFilePath,
    columns: List[str],
) -> None:
    cloud_sql_client = CloudSQLClientImpl()
    instance_name = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id(
        schema_type)
    if instance_name is None:
        raise ValueError("Could not find instance name.")
    operation_id = cloud_sql_client.export_to_gcs_csv(
        instance_name=instance_name,
        table_name=table_name,
        gcs_uri=gcs_uri,
        columns=columns,
    )
    if operation_id is None:
        raise RuntimeError(
            "Cloud SQL export operation was not started successfully.")

    operation_succeeded = cloud_sql_client.wait_until_operation_completed(
        operation_id)

    if not operation_succeeded:
        raise RuntimeError("Cloud SQL export failed.")
def create_export_context(schema_type: SchemaType, export_uri: str,
                          export_query: str) -> dict:
    """Creates the exportContext configuration for the export operation.

    See here for details:
    https://cloud.google.com/sql/docs/postgres/admin-api/v1beta4/instances/export

    Args:
        schema_type: The schema, either SchemaType.JAILS or
            SchemaType.STATE of the table being exported.
        export_uri: GCS URI to write the exported CSV data to.
        export_query: SQL query defining the data to be exported.

    Returns:
        export_context dict which can be passed to client.instances.export().
    """

    export_context = {
        'exportContext': {
            'kind': 'sql#exportContext',
            'fileType': 'CSV',
            'uri': export_uri,
            'databases': [SQLAlchemyEngineManager.get_db_name(schema_type)],
            'csvExportOptions': {
                'selectQuery': export_query
            }
        }
    }

    return export_context
Exemple #16
0
    def testGetAllStrippedCloudSqlInstanceIds(
            self, mock_secrets: mock.MagicMock) -> None:
        # Arrange
        mock_secrets.side_effect = [
            "project:region:111",
            "project:region:222",
            "project:region:333",
            "project:region:444",
            "project:region:555",
        ]

        # Act
        ids = SQLAlchemyEngineManager.get_all_stripped_cloudsql_instance_ids()

        # Assert
        self.assertEqual(ids, ["111", "222", "333", "444", "555"])
        mock_secrets.assert_has_calls(
            [
                mock.call("sqlalchemy_cloudsql_instance_id"),
                mock.call("state_cloudsql_instance_id"),
                mock.call("operations_cloudsql_instance_id"),
                mock.call("justice_counts_cloudsql_instance_id"),
                mock.call("case_triage_cloudsql_instance_id"),
            ],
            any_order=True,
        )
Exemple #17
0
    def test_enums_match_schema(self):
        with runner(self.default_config()) as r:
            r.migrate_up_to('head')

        # Fetch enum values
        migration_enums = self.fetch_all_enums()

        # Doing teardown/setup to generate a new postgres instance
        local_postgres_helpers.restore_local_env_vars(self.overridden_env_vars)
        local_postgres_helpers.stop_and_clear_on_disk_postgresql_database(
            self.db_dir)

        self.db_dir = local_postgres_helpers.start_on_disk_postgresql_database(
        )
        self.overridden_env_vars = local_postgres_helpers.update_local_sqlalchemy_postgres_env_vars(
        )

        local_postgres_helpers.use_on_disk_postgresql_database(
            SQLAlchemyEngineManager.declarative_method_for_schema(
                self.schema_type))

        # Check enum values
        schema_enums = self.fetch_all_enums()

        # Assert that they all match
        self.assertEqual(len(migration_enums), len(schema_enums))
        for enum_name, migration_values in migration_enums.items():
            schema_values = schema_enums[enum_name]
            self.assertEqual(len(migration_values),
                             len(schema_values),
                             msg=f'{enum_name} lengths differ')
            self.assertEqual(len(migration_values),
                             len(migration_values.intersection(schema_values)),
                             msg=f'{enum_name} values differ')
    def testInitEngines_usesCorrectIsolationLevels(self, mock_get_secret,
                                                   mock_in_gcp,
                                                   mock_in_production,
                                                   mock_create_engine):
        # Arrange
        mock_in_gcp.return_value = True
        mock_in_production.return_value = True
        # Pretend all secret values are just the key suffixed with '_value'
        mock_get_secret.side_effect = lambda key: f"{key}_value"

        # Act
        SQLAlchemyEngineManager.init_engines_for_server_postgres_instances()

        # Assert
        assert mock_create_engine.call_args_list == [
            call(
                "postgresql://*****:*****@/sqlalchemy_db_name_value"
                "?host=/cloudsql/sqlalchemy_cloudsql_instance_id_value",
                isolation_level=None,
                pool_recycle=600,
            ),
            call(
                "postgresql://*****:*****@/state_db_name_value"
                "?host=/cloudsql/state_cloudsql_instance_id_value",
                isolation_level="SERIALIZABLE",
                pool_recycle=600,
            ),
            call(
                "postgresql://*****:*****@/operations_db_name_value"
                "?host=/cloudsql/operations_cloudsql_instance_id_value",
                isolation_level=None,
                pool_recycle=600,
            ),
            call(
                "postgresql://*****:*****@/"
                "justice_counts_db_name_value?host=/cloudsql/justice_counts_cloudsql_instance_id_value",
                isolation_level="SERIALIZABLE",
                pool_recycle=600,
            ),
            call(
                "postgresql://*****:*****@/"
                "case_triage_db_name_value?host=/cloudsql/case_triage_cloudsql_instance_id_value",
                isolation_level=None,
                pool_recycle=600,
            ),
        ]
def setup_scoped_sessions(app: Flask, db_url: URL) -> Engine:
    engine = SQLAlchemyEngineManager.init_engine_for_postgres_instance(
        database_key=SQLAlchemyDatabaseKey.for_schema(SchemaType.CASE_TRIAGE),
        db_url=db_url,
    )
    session_factory = sessionmaker(bind=engine)
    app.scoped_session = flask_scoped_session(session_factory, app)
    return engine
Exemple #20
0
def use_on_disk_postgresql_database(declarative_base: DeclarativeMeta) -> None:
    """Creates a new SqlDatabase object used to communicate to an on-disk
    PostgreSQL database.

    This includes:
    1. Create all tables in the newly created sqlite database
    2. Bind the global SessionMaker to the new database engine

    Note, this assumes the following is true:
    - A local postgres server has been started
      (`pg_ctl -D /usr/local/var/postgres start`)
    - A DB named `recidiviz_test` exists (`createdb recidiviz_test`)
    - A User `usr` has been created with no password (`createdb usr`)
    """
    url = 'postgresql://usr:@localhost:5432/recidiviz_test'
    SQLAlchemyEngineManager.init_engine_for_db_instance(
        db_url=url, schema_base=declarative_base)
Exemple #21
0
def main(database: SchemaType, message: str, use_local_db: bool) -> None:
    """Runs the script to autogenerate migrations."""
    if use_local_db:
        # TODO(#4619): We should eventually move this from a local postgres instance to running
        # postgres from a docker container.
        if not local_postgres_helpers.can_start_on_disk_postgresql_database():
            logging.error(
                "pg_ctl is not installed, so the script cannot be run locally. "
                "--project-id must be specified to run against staging or production."
            )
            logging.error("Exiting...")
            sys.exit(1)
        logging.info("Starting local postgres database for autogeneration...")
        tmp_db_dir = local_postgres_helpers.start_on_disk_postgresql_database()
        original_env_vars = (
            local_postgres_helpers.update_local_sqlalchemy_postgres_env_vars())
    else:
        # TODO(Recidiviz/zenhub-tasks#134): This code path will throw when pointed at staging
        # because we havne't created valid read-only users there just yet.
        try:
            original_env_vars = SQLAlchemyEngineManager.update_sqlalchemy_env_vars(
                database, readonly_user=True)
        except ValueError as e:
            logging.warning("Error fetching SQLAlchemy credentials: %s", e)
            logging.warning(
                "Until readonly users are created, we cannot autogenerate migrations against staging."
            )
            logging.warning(
                "See https://github.com/Recidiviz/zenhub-tasks/issues/134")
            sys.exit(1)

    try:
        config = alembic.config.Config(
            SQLAlchemyEngineManager.get_alembic_file(database))
        if use_local_db:
            upgrade(config, "head")
        revision(config, autogenerate=True, message=message)
    except Exception as e:
        logging.error("Automigration generation failed: %s", e)

    local_postgres_helpers.restore_local_env_vars(original_env_vars)
    if use_local_db:
        logging.info("Stopping local postgres database...")
        local_postgres_helpers.stop_and_clear_on_disk_postgresql_database(
            tmp_db_dir)
Exemple #22
0
def confirm_correct_db(database: SchemaType) -> None:
    dbname = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id(
        database)
    if dbname is None:
        logging.error("Could not find database instance.")
        logging.error("Exiting...")
        sys.exit(1)

    prompt_for_confirmation(f"Running migrations on {dbname}.", dbname)
Exemple #23
0
def use_on_disk_postgresql_database(declarative_base: DeclarativeMeta) -> None:
    """Connects SQLAlchemy to a local test postgres server. Should be called after the test database and user have
    already been initialized. In Travis, these are set up via commands in the .travis.yaml, locally, you must have
    first called start_on_disk_postgresql_database().

    This includes:
    1. Create all tables in the newly created sqlite database
    2. Bind the global SessionMaker to the new database engine
    """
    if environment.in_gae():
        raise ValueError('Running test-only code in Google App Engine.')

    if declarative_base not in DECLARATIVE_BASES:
        raise ValueError(f"Unexpected declarative base: {declarative_base}.")

    SQLAlchemyEngineManager.init_engine_for_db_instance(
        db_url=f'postgresql://{TEST_POSTGRES_USER_NAME}:@localhost:5432/{TEST_POSTGRES_DB_NAME}',
        schema_base=declarative_base)
    def for_schema_base(cls, schema_base: DeclarativeMeta) -> Session:
        engine = SQLAlchemyEngineManager.get_engine_for_schema_base(
            schema_base)
        if engine is None:
            raise ValueError(
                f"No engine set for base [{schema_base.__name__}]")

        session = Session(bind=engine)
        cls._apply_session_listener_for_schema_base(schema_base, session)
        return session
def update_long_term_backups() -> Tuple[str, HTTPStatus]:
    """Create manual backups for all cloudsql instances and delete
    manual backups for each instance that are older than _MAX_BACKUP_AGE_DAYS.
    """
    project_id = metadata.project_id()
    logging.info("Starting backup of all cloudsql instances in [%s]", project_id)
    for instance_id in SQLAlchemyEngineManager.get_all_stripped_cloudsql_instance_ids():
        update_long_term_backups_for_cloudsql_instance(project_id, instance_id)

    logging.info("All backup operations completed successfully")
    return "", HTTPStatus.OK
Exemple #26
0
def teardown_on_disk_postgresql_database(declarative_base: DeclarativeMeta) -> None:
    """Clears state in an on-disk postgres database for a given schema, for use once a single test has completed. As an
    optimization, does not actually drop tables, just clears them. As a best practice, you should call
    stop_and_clear_on_disk_postgresql_database() once all tests in a test class are complete to actually drop the
    tables.
    """
    if environment.in_gae():
        raise ValueError('Running test-only code in Google App Engine.')

    session = SessionFactory.for_schema_base(declarative_base)
    try:
        for table in reversed(declarative_base.metadata.sorted_tables):
            session.execute(table.delete())
        session.commit()
    except Exception as e:
        session.rollback()
        raise e
    finally:
        session.close()

    SQLAlchemyEngineManager.teardown_engine_for_schema(declarative_base)
    def testGetAllStrippedCloudSqlInstanceIds_returnsOnlyConfiguredIds(
            self, mock_secrets):
        # Arrange
        mock_secrets.side_effect = [
            'project:zone:111', 'project:zone:222', 'project:zone:333',
            'project:zone:444'
        ]

        # Act
        ids = SQLAlchemyEngineManager.get_all_stripped_cloudsql_instance_ids()

        # Assert
        assert ids == ['111', '222', '333', '444']
    def _for_database(cls, database_key: SQLAlchemyDatabaseKey) -> Session:
        # TODO(#8046): When the above method is deleted, move this into `using_database`
        # directly.
        engine = SQLAlchemyEngineManager.get_engine_for_database(
            database_key=database_key)
        if engine is None:
            raise ValueError(f"No engine set for key [{database_key}]")

        session = Session(bind=engine)
        cls._alter_session_variables(session)
        cls._apply_session_listener_for_schema_base(
            database_key.declarative_meta, session)
        return session
Exemple #29
0
def use_in_memory_sqlite_database(declarative_base: DeclarativeMeta) -> None:
    """Creates a new SqlDatabase object used to communicate to a fake in-memory
    sqlite database.

    This includes:
    1. Creates a new in memory sqlite database engine with a shared cache, allowing access from any test thread.
    2. Create all tables in the newly created sqlite database
    3. Bind the global SessionMaker to the new fake database engine

    This will assert if an engine has already been initialized for this schema - you must use
    teardown_in_memory_sqlite_databases() to do post-test cleanup, otherwise subsequent tests will fail. It will also
    assert if you attempt to create connections from multiple threads within the context of a single test - SQLite does
    not handle multi-threading well and will often lock or crash when used in a multi-threading scenario.
    """
    if environment.in_gae():
        raise ValueError('Running test-only code in Google App Engine.')

    def connection_creator() -> sqlite3.Connection:
        global _in_memory_sqlite_connection_thread_ids

        thread_id = threading.get_ident()
        if thread_id in _in_memory_sqlite_connection_thread_ids:
            raise ValueError(
                'Accessing SQLite in-memory database on multiple threads. Either you forgot to call '
                'teardown_in_memory_sqlite_databases() or you should be using a persistent postgres DB.'
            )

        _in_memory_sqlite_connection_thread_ids.add(thread_id)
        connection = sqlite3.connect('file::memory:', uri=True)

        # Configures SQLite to enforce foreign key constraints
        connection.execute('PRAGMA foreign_keys = ON;')

        return connection

    SQLAlchemyEngineManager.init_engine_for_db_instance(
        db_url='sqlite:///:memory:',
        schema_base=declarative_base,
        creator=connection_creator)
Exemple #30
0
def teardown_on_disk_postgresql_database(
        declarative_base: DeclarativeMeta) -> None:
    """Clears state in an on-disk postgres database for a given schema, for use once a single test has completed. As an
    optimization, does not actually drop tables, just clears them. As a best practice, you should call
    stop_and_clear_on_disk_postgresql_database() once all tests in a test class are complete to actually drop the
    tables.
    """
    # Ensure all sessions are closed, otherwise the below may hang.
    close_all_sessions()

    session = SessionFactory.for_schema_base(declarative_base)
    try:
        for table in reversed(declarative_base.metadata.sorted_tables):
            session.execute(table.delete())
        session.commit()
    except Exception as e:
        session.rollback()
        raise e
    finally:
        session.close()

    SQLAlchemyEngineManager.teardown_engine_for_schema(declarative_base)