Exemple #1
0
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
# =============================================================================
"""A test view builder file for big_query_view_collector_test.py"""

from recidiviz.big_query.big_query_view import BigQueryView
from recidiviz.tests.big_query.fake_big_query_view_builder import (
    FakeBigQueryViewBuilder, )
from recidiviz.utils.metadata import local_project_id_override

with local_project_id_override("my-project-id"):
    GOOD_VIEW_1 = BigQueryView(
        dataset_id="my_dataset",
        view_id="early_discharge_incarceration_sentence",
        description="early_discharge_incarceration_sentence description",
        view_query_template="SELECT * FROM table1",
    )

VIEW_BUILDER = FakeBigQueryViewBuilder(GOOD_VIEW_1)
"""The run dates to use for the simulation validation"""
# pylint: disable=trailing-whitespace
from recidiviz.big_query.big_query_view import SimpleBigQueryViewBuilder
from recidiviz.calculator.query.state import dataset_config
from recidiviz.utils.environment import GCP_PROJECT_STAGING
from recidiviz.utils.metadata import local_project_id_override

SIMULATION_RUN_DATES_VIEW_NAME = 'simulation_run_dates'

SIMULATION_RUN_DATES_VIEW_DESCRIPTION = \
    """"All of the run dates to use for validating the simulation"""

SIMULATION_RUN_DATES_QUERY_TEMPLATE = \
    """
    SELECT *
    FROM
    UNNEST(GENERATE_DATE_ARRAY('2018-01-01', DATE_TRUNC(CURRENT_DATE, MONTH), INTERVAL 1 MONTH)) AS run_date
    """

SIMULATION_RUN_DATES_VIEW_BUILDER = SimpleBigQueryViewBuilder(
    dataset_id=dataset_config.POPULATION_PROJECTION_DATASET,
    view_id=SIMULATION_RUN_DATES_VIEW_NAME,
    view_query_template=SIMULATION_RUN_DATES_QUERY_TEMPLATE,
    description=SIMULATION_RUN_DATES_VIEW_DESCRIPTION,
    should_materialize=False
)

if __name__ == '__main__':
    with local_project_id_override(GCP_PROJECT_STAGING):
        SIMULATION_RUN_DATES_VIEW_BUILDER.build_and_print()
            bq_client.update_schema(dataflow_metrics_dataset_id, table_id,
                                    schema_for_metric_class)
        else:
            # Create a table with this schema
            bq_client.create_table_with_schema(dataflow_metrics_dataset_id,
                                               table_id,
                                               schema_for_metric_class)


def parse_arguments(argv: List[str]) -> Tuple[argparse.Namespace, List[str]]:
    """Parses the arguments needed to call the desired function."""
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--project_id",
        dest="project_id",
        type=str,
        choices=[GCP_PROJECT_STAGING, GCP_PROJECT_PRODUCTION],
        required=True,
    )

    return parser.parse_known_args(argv)


if __name__ == "__main__":
    logging.getLogger().setLevel(logging.INFO)
    known_args, _ = parse_arguments(sys.argv)

    with local_project_id_override(known_args.project_id):
        update_dataflow_metric_tables_schemas()
Exemple #4
0
    def test_initialize_queues(self, mock_regions):
        # Arrange
        region_xx = fake_region(
            region_code="us_xx",
            queue={"rate_limits": {
                "max_dispatches_per_second": 0.3
            }},
        )
        region_xx.get_queue_name.return_value = "us_xx_queue"
        region_yy = fake_region(region_code="us_yy")
        region_yy.get_queue_name.return_value = "us_yy_queue"
        mock_regions.return_value = [region_xx, region_yy]

        # Act
        with metadata.local_project_id_override("my-project-id"):
            google_cloud_task_queue_config.initialize_queues(
                google_auth_token="fake-auth-token")

        # Assert
        queues_updated_by_id = self.get_updated_queues()
        for queue in queues_updated_by_id.values():
            self.assertTrue(
                queue.name.startswith(
                    "projects/my-project-id/locations/us-east1/queues/"))
            self.assertEqual(queue.stackdriver_logging_config.sampling_ratio,
                             1.0)

        direct_ingest_queue_ids = {
            "direct-ingest-state-process-job-queue-v2",
            "direct-ingest-jpp-process-job-queue-v2",
            "direct-ingest-bq-import-export-v2",
            "direct-ingest-scheduler-v2",
        }
        self.assertFalse(
            direct_ingest_queue_ids.difference(queues_updated_by_id.keys()))

        for queue_id in direct_ingest_queue_ids:
            queue = queues_updated_by_id[queue_id]
            self.assertEqual(queue.rate_limits.max_concurrent_dispatches, 1)

        # Test that composition works as expected
        self.assertEqual(
            queues_updated_by_id[region_xx.get_queue_name()],
            queue_pb2.Queue(
                name=
                "projects/my-project-id/locations/us-east1/queues/us_xx_queue",
                rate_limits=queue_pb2.RateLimits(
                    # This is overridden in the mock above
                    max_dispatches_per_second=0.3,
                    max_concurrent_dispatches=3,
                ),
                retry_config=queue_pb2.RetryConfig(
                    min_backoff=duration_pb2.Duration(seconds=5),
                    max_backoff=duration_pb2.Duration(seconds=300),
                    max_attempts=5,
                ),
                stackdriver_logging_config=queue_pb2.StackdriverLoggingConfig(
                    sampling_ratio=1.0, ),
            ),
        )

        # Test that other regions are unaffected
        self.assertEqual(
            queues_updated_by_id[region_yy.get_queue_name()],
            queue_pb2.Queue(
                name=
                "projects/my-project-id/locations/us-east1/queues/us_yy_queue",
                rate_limits=queue_pb2.RateLimits(
                    max_dispatches_per_second=0.08333333333,
                    max_concurrent_dispatches=3,
                ),
                retry_config=queue_pb2.RetryConfig(
                    min_backoff=duration_pb2.Duration(seconds=5),
                    max_backoff=duration_pb2.Duration(seconds=300),
                    max_attempts=5,
                ),
                stackdriver_logging_config=queue_pb2.StackdriverLoggingConfig(
                    sampling_ratio=1.0, ),
            ),
        )

        self.assertTrue("bigquery-v2" in queues_updated_by_id)
        self.assertTrue("job-monitor-v2" in queues_updated_by_id)
        self.assertTrue("scraper-phase-v2" in queues_updated_by_id)
Exemple #5
0
        }

    for validation_job in failed_to_run_validations:
        logging.error("Failed to run data validation job: %s", validation_job)

        monitoring_tags = tags_for_job(validation_job)
        with monitoring.measurements(monitoring_tags) as measurements:
            measurements.measure_int_put(m_failed_to_run_validations, 1)

    for result in failed_validations:
        logging.error("Failed data validation: %s", result)

        monitoring_tags = tags_for_job(result.validation_job)
        with monitoring.measurements(monitoring_tags) as measurements:
            measurements.measure_int_put(m_failed_validations, 1)


def _readable_response(
        failed_validations: List[DataValidationJobResult]) -> str:
    readable_output = "\n".join([f.__str__() for f in failed_validations])
    return f"Failed validations:\n{readable_output}"


if __name__ == "__main__":
    # This will run validations for all regions against data in the given project, regardless of whether the region is
    # officially launched in that environment.
    project_id = GCP_PROJECT_STAGING
    logging.getLogger().setLevel(logging.INFO)
    with local_project_id_override(project_id):
        execute_validation(rematerialize_views=True, region_code_filter=None)
# =============================================================================
"""Script run on deploy that initializes all task queues with appropriate
configurations."""

import argparse
import logging
from recidiviz.utils import metadata

from recidiviz.common.google_cloud import google_cloud_task_queue_config

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("--project_id",
                        required=True,
                        help="Project to initialize queues for")
    parser.add_argument(
        "--google_auth_token",
        required=True,
        help="Auth token (obtained via "
        "`gcloud auth print-access-token`).",
    )

    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO, format="%(message)s")

    with metadata.local_project_id_override(args.project_id):
        google_cloud_task_queue_config.initialize_queues(
            google_auth_token=args.google_auth_token, )
Exemple #7
0
            logging.warning(
                "Until readonly users are created, we cannot autogenerate migrations against staging."
            )
            logging.warning(
                "See https://github.com/Recidiviz/zenhub-tasks/issues/134")
            sys.exit(1)

    try:
        config = alembic.config.Config(
            SQLAlchemyEngineManager.get_alembic_file(database))
        if use_local_db:
            upgrade(config, "head")
        revision(config, autogenerate=True, message=message)
    except Exception as e:
        logging.error("Automigration generation failed: %s", e)

    local_postgres_helpers.restore_local_env_vars(original_env_vars)
    if use_local_db:
        logging.info("Stopping local postgres database...")
        local_postgres_helpers.stop_and_clear_on_disk_postgresql_database(
            tmp_db_dir)


if __name__ == "__main__":
    args = create_parser().parse_args()
    if not args.project_id:
        main(args.database, args.message, True)
    else:
        with local_project_id_override(args.project_id):
            main(args.database, args.message, False)