# # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. # ============================================================================= """A test view builder file for big_query_view_collector_test.py""" from recidiviz.big_query.big_query_view import BigQueryView from recidiviz.tests.big_query.fake_big_query_view_builder import ( FakeBigQueryViewBuilder, ) from recidiviz.utils.metadata import local_project_id_override with local_project_id_override("my-project-id"): GOOD_VIEW_1 = BigQueryView( dataset_id="my_dataset", view_id="early_discharge_incarceration_sentence", description="early_discharge_incarceration_sentence description", view_query_template="SELECT * FROM table1", ) VIEW_BUILDER = FakeBigQueryViewBuilder(GOOD_VIEW_1)
"""The run dates to use for the simulation validation""" # pylint: disable=trailing-whitespace from recidiviz.big_query.big_query_view import SimpleBigQueryViewBuilder from recidiviz.calculator.query.state import dataset_config from recidiviz.utils.environment import GCP_PROJECT_STAGING from recidiviz.utils.metadata import local_project_id_override SIMULATION_RUN_DATES_VIEW_NAME = 'simulation_run_dates' SIMULATION_RUN_DATES_VIEW_DESCRIPTION = \ """"All of the run dates to use for validating the simulation""" SIMULATION_RUN_DATES_QUERY_TEMPLATE = \ """ SELECT * FROM UNNEST(GENERATE_DATE_ARRAY('2018-01-01', DATE_TRUNC(CURRENT_DATE, MONTH), INTERVAL 1 MONTH)) AS run_date """ SIMULATION_RUN_DATES_VIEW_BUILDER = SimpleBigQueryViewBuilder( dataset_id=dataset_config.POPULATION_PROJECTION_DATASET, view_id=SIMULATION_RUN_DATES_VIEW_NAME, view_query_template=SIMULATION_RUN_DATES_QUERY_TEMPLATE, description=SIMULATION_RUN_DATES_VIEW_DESCRIPTION, should_materialize=False ) if __name__ == '__main__': with local_project_id_override(GCP_PROJECT_STAGING): SIMULATION_RUN_DATES_VIEW_BUILDER.build_and_print()
bq_client.update_schema(dataflow_metrics_dataset_id, table_id, schema_for_metric_class) else: # Create a table with this schema bq_client.create_table_with_schema(dataflow_metrics_dataset_id, table_id, schema_for_metric_class) def parse_arguments(argv: List[str]) -> Tuple[argparse.Namespace, List[str]]: """Parses the arguments needed to call the desired function.""" parser = argparse.ArgumentParser() parser.add_argument( "--project_id", dest="project_id", type=str, choices=[GCP_PROJECT_STAGING, GCP_PROJECT_PRODUCTION], required=True, ) return parser.parse_known_args(argv) if __name__ == "__main__": logging.getLogger().setLevel(logging.INFO) known_args, _ = parse_arguments(sys.argv) with local_project_id_override(known_args.project_id): update_dataflow_metric_tables_schemas()
def test_initialize_queues(self, mock_regions): # Arrange region_xx = fake_region( region_code="us_xx", queue={"rate_limits": { "max_dispatches_per_second": 0.3 }}, ) region_xx.get_queue_name.return_value = "us_xx_queue" region_yy = fake_region(region_code="us_yy") region_yy.get_queue_name.return_value = "us_yy_queue" mock_regions.return_value = [region_xx, region_yy] # Act with metadata.local_project_id_override("my-project-id"): google_cloud_task_queue_config.initialize_queues( google_auth_token="fake-auth-token") # Assert queues_updated_by_id = self.get_updated_queues() for queue in queues_updated_by_id.values(): self.assertTrue( queue.name.startswith( "projects/my-project-id/locations/us-east1/queues/")) self.assertEqual(queue.stackdriver_logging_config.sampling_ratio, 1.0) direct_ingest_queue_ids = { "direct-ingest-state-process-job-queue-v2", "direct-ingest-jpp-process-job-queue-v2", "direct-ingest-bq-import-export-v2", "direct-ingest-scheduler-v2", } self.assertFalse( direct_ingest_queue_ids.difference(queues_updated_by_id.keys())) for queue_id in direct_ingest_queue_ids: queue = queues_updated_by_id[queue_id] self.assertEqual(queue.rate_limits.max_concurrent_dispatches, 1) # Test that composition works as expected self.assertEqual( queues_updated_by_id[region_xx.get_queue_name()], queue_pb2.Queue( name= "projects/my-project-id/locations/us-east1/queues/us_xx_queue", rate_limits=queue_pb2.RateLimits( # This is overridden in the mock above max_dispatches_per_second=0.3, max_concurrent_dispatches=3, ), retry_config=queue_pb2.RetryConfig( min_backoff=duration_pb2.Duration(seconds=5), max_backoff=duration_pb2.Duration(seconds=300), max_attempts=5, ), stackdriver_logging_config=queue_pb2.StackdriverLoggingConfig( sampling_ratio=1.0, ), ), ) # Test that other regions are unaffected self.assertEqual( queues_updated_by_id[region_yy.get_queue_name()], queue_pb2.Queue( name= "projects/my-project-id/locations/us-east1/queues/us_yy_queue", rate_limits=queue_pb2.RateLimits( max_dispatches_per_second=0.08333333333, max_concurrent_dispatches=3, ), retry_config=queue_pb2.RetryConfig( min_backoff=duration_pb2.Duration(seconds=5), max_backoff=duration_pb2.Duration(seconds=300), max_attempts=5, ), stackdriver_logging_config=queue_pb2.StackdriverLoggingConfig( sampling_ratio=1.0, ), ), ) self.assertTrue("bigquery-v2" in queues_updated_by_id) self.assertTrue("job-monitor-v2" in queues_updated_by_id) self.assertTrue("scraper-phase-v2" in queues_updated_by_id)
} for validation_job in failed_to_run_validations: logging.error("Failed to run data validation job: %s", validation_job) monitoring_tags = tags_for_job(validation_job) with monitoring.measurements(monitoring_tags) as measurements: measurements.measure_int_put(m_failed_to_run_validations, 1) for result in failed_validations: logging.error("Failed data validation: %s", result) monitoring_tags = tags_for_job(result.validation_job) with monitoring.measurements(monitoring_tags) as measurements: measurements.measure_int_put(m_failed_validations, 1) def _readable_response( failed_validations: List[DataValidationJobResult]) -> str: readable_output = "\n".join([f.__str__() for f in failed_validations]) return f"Failed validations:\n{readable_output}" if __name__ == "__main__": # This will run validations for all regions against data in the given project, regardless of whether the region is # officially launched in that environment. project_id = GCP_PROJECT_STAGING logging.getLogger().setLevel(logging.INFO) with local_project_id_override(project_id): execute_validation(rematerialize_views=True, region_code_filter=None)
# ============================================================================= """Script run on deploy that initializes all task queues with appropriate configurations.""" import argparse import logging from recidiviz.utils import metadata from recidiviz.common.google_cloud import google_cloud_task_queue_config if __name__ == "__main__": parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--project_id", required=True, help="Project to initialize queues for") parser.add_argument( "--google_auth_token", required=True, help="Auth token (obtained via " "`gcloud auth print-access-token`).", ) args = parser.parse_args() logging.basicConfig(level=logging.INFO, format="%(message)s") with metadata.local_project_id_override(args.project_id): google_cloud_task_queue_config.initialize_queues( google_auth_token=args.google_auth_token, )
logging.warning( "Until readonly users are created, we cannot autogenerate migrations against staging." ) logging.warning( "See https://github.com/Recidiviz/zenhub-tasks/issues/134") sys.exit(1) try: config = alembic.config.Config( SQLAlchemyEngineManager.get_alembic_file(database)) if use_local_db: upgrade(config, "head") revision(config, autogenerate=True, message=message) except Exception as e: logging.error("Automigration generation failed: %s", e) local_postgres_helpers.restore_local_env_vars(original_env_vars) if use_local_db: logging.info("Stopping local postgres database...") local_postgres_helpers.stop_and_clear_on_disk_postgresql_database( tmp_db_dir) if __name__ == "__main__": args = create_parser().parse_args() if not args.project_id: main(args.database, args.message, True) else: with local_project_id_override(args.project_id): main(args.database, args.message, False)