Exemple #1
0
def handle_state_dashboard_user_restrictions_file(
        data: Dict[str, Any], _: ContextType) -> Tuple[str, HTTPStatus]:
    """This function is triggered when a file is dropped in a
    `recidiviz-{project_id}-dashboard-user-restrictions/US_XX` bucket.

    If the file matches `dashboard_user_restrictions.csv`, then it makes a request to import the CSV
    to the Cloud SQL `dashboard_user_restrictions` table in the Case Triage schema.

    Once the CSV import finishes, it makes a request to update the Auth0 users with the user restrictions.

    data: A cloud storage object that holds name information and other metadata
    related to the file that was dropped into the bucket.
    _: (google.cloud.functions.Context): Metadata of triggering event.

    """
    project_id = os.environ.get(GCP_PROJECT_ID_KEY)
    if not project_id:
        logging.error(
            "No project id set for call to update auth0 users, returning.")
        return "", HTTPStatus.BAD_REQUEST

    filepath = data["name"].split("/")

    # Expected file path structure is US_XX/dashboard_user_restrictions.csv
    if len(filepath) != 2:
        logging.info(
            "Skipping filepath, incorrect number of nested directories: %s",
            filepath)
        return "", HTTPStatus.OK

    region_code, filename = filepath
    csv_file = "dashboard_user_restrictions.csv"

    if filename == csv_file:
        import_user_restrictions_url = (
            _APP_ENGINE_IMPORT_USER_RESTRICTIONS_CSV_TO_SQL_URL.format(
                project_id,
                region_code,
            ))
        logging.info("Calling URL: %s", import_user_restrictions_url)

        # Hit the App Engine endpoint `auth/import_user_restrictions_csv_to_sql`.
        response = make_iap_request(import_user_restrictions_url,
                                    IAP_CLIENT_ID[project_id])
        logging.info(
            "The %s response status is %s",
            import_user_restrictions_url,
            response.status_code,
        )

        if response.status_code == HTTPStatus.OK:
            update_users_url = _APP_ENGINE_UPDATE_AUTH0_USER_METADATA_URL.format(
                project_id, region_code)
            # Hit the App Engine endpoint `auth/update_auth0_user_metadata`.
            response = make_iap_request(update_users_url,
                                        IAP_CLIENT_ID[project_id])
            logging.info("The %s response status is %s", update_users_url,
                         response.status_code)

    return "", HTTPStatus.OK
Exemple #2
0
def handle_deliver_emails_for_batch_email_reporting(request: Request) -> None:
    """Cloud function to deliver a batch of generated emails.
    It hits the App Engine endpoint `reporting/deliver_emails_for_batch`. It requires a JSON input containing the
    following keys:
        batch_id: (required) Identifier for this batch
        redirect_address: (optional) An email address to which all emails should
        be sent instead of to their actual recipients.
    Args:
        request: HTTP request payload containing JSON with keys as described above
    Returns:
        Nothing.
    Raises:
        Nothing. All exception raising is handled within the App Engine logic.
    """
    project_id = os.environ.get(GCP_PROJECT_ID_KEY)
    if not project_id:
        logging.error("No project id set, returning")
        return

    request_params = request.get_json()
    if not request_params:
        logging.error("No request params, returning")
        return

    batch_id = request_params.get("batch_id", '')
    redirect_address = request_params.get("redirect_address", '')
    url = _APP_ENGINE_PO_MONTHLY_REPORT_DELIVER_EMAILS_URL.format(project_id, batch_id, redirect_address)

    logging.info("Calling URL: %s", url)
    response = make_iap_request(url, IAP_CLIENT_ID[project_id])
    logging.info("The response status is %s", response.status_code)
Exemple #3
0
def normalize_raw_file_path(data: Dict[str, Any],
                            _: ContextType) -> Tuple[str, HTTPStatus]:
    """Cloud functions can be configured to trigger this function on any bucket that is being used as a test bed for
    automatic uploads. This will just rename the incoming files to have a normalized path with a timestamp so
    subsequent uploads do not have naming conflicts."""
    project_id = os.environ.get(GCP_PROJECT_ID_KEY)
    if not project_id:
        error_str = (
            "No project id set for call to direct ingest cloud function, returning."
        )
        logging.error(error_str)
        return error_str, HTTPStatus.BAD_REQUEST

    bucket = data["bucket"]
    relative_file_path = data["name"]

    url = _DIRECT_INGEST_NORMALIZE_RAW_PATH_URL.format(project_id, bucket,
                                                       relative_file_path)

    logging.info("Calling URL: %s", url)

    # Hit the cloud function backend, which will schedule jobs to parse
    # data for unprocessed files in this bucket and persist to our database.
    response = make_iap_request(url, IAP_CLIENT_ID[project_id])
    logging.info("The response status is %s", response.status_code)
    return "", HTTPStatus(response.status_code)
Exemple #4
0
def parse_state_aggregate(data: Dict[str, Any],
                          _: ContextType) -> Tuple[str, HTTPStatus]:
    """This function is triggered when a file is dropped into the state
    aggregate bucket and makes a request to parse and write the data to the
    aggregate table database.

    data: A cloud storage object that holds name information and other metadata
    related to the file that was dropped into the bucket.
    _: (google.cloud.functions.Context): Metadata of triggering event.
    """
    bucket = data["bucket"]
    state, filename = data["name"].split("/")
    project_id = os.environ.get(GCP_PROJECT_ID_KEY)
    logging.info(
        "Running cloud function for bucket %s, state %s, filename %s",
        bucket,
        state,
        filename,
    )
    url = _STATE_AGGREGATE_CLOUD_FUNCTION_URL.format(project_id, bucket, state,
                                                     filename)
    # Hit the cloud function backend, which persists the table data to our
    # database.
    response = make_iap_request(url, IAP_CLIENT_ID[project_id])
    logging.info("The response status is %s", response.status_code)
    return "", HTTPStatus(response.status_code)
Exemple #5
0
def export_metric_view_data(event: Dict[str, Any],
                            _context: ContextType) -> Tuple[str, HTTPStatus]:
    """This function is triggered by a Pub/Sub event to begin the export of data contained in BigQuery metric views to
    files in cloud storage buckets.
    """
    project_id = os.environ.get(GCP_PROJECT_ID_KEY)
    if not project_id:
        error_str = "No project id set for call to export view data, returning."
        logging.error(error_str)
        return error_str, HTTPStatus.BAD_REQUEST

    if "data" in event:
        logging.info("data found")
        url = (_METRIC_VIEW_EXPORT_CLOUD_FUNCTION_URL.format(project_id) +
               "?export_job_filter=" +
               b64decode(event["data"]).decode("utf-8"))
    else:
        error_str = "Missing required export_job_filter in data of the Pub/Sub message."
        logging.error(error_str)
        return error_str, HTTPStatus.BAD_REQUEST

    logging.info("project_id: %s", project_id)
    logging.info("Calling URL: %s", url)

    # Hit the cloud function backend, which exports view data to their assigned cloud storage bucket
    response = make_iap_request(url, IAP_CLIENT_ID[project_id])
    logging.info("The response status is %s", response.status_code)
    return "", HTTPStatus(response.status_code)
Exemple #6
0
def trigger_daily_calculation_pipeline_dag(
        data: Dict[str, Any], _context: ContextType) -> Tuple[str, HTTPStatus]:
    """This function is triggered by a Pub/Sub event, triggers an Airflow DAG where all
    the daily calculation pipelines run simultaneously.
    """
    project_id = os.environ.get(GCP_PROJECT_ID_KEY, "")
    if not project_id:
        error_str = (
            "No project id set for call to run the calculation pipelines, returning."
        )
        logging.error(error_str)
        return error_str, HTTPStatus.BAD_REQUEST

    iap_client_id = os.environ.get("IAP_CLIENT_ID")
    if not iap_client_id:
        error_str = "The environment variable 'IAP_CLIENT_ID' is not set."
        logging.error(error_str)
        return error_str, HTTPStatus.BAD_REQUEST

    airflow_uri = os.environ.get("AIRFLOW_URI")
    if not airflow_uri:
        error_str = "The environment variable 'AIRFLOW_URI' is not set"
        logging.error(error_str)
        return error_str, HTTPStatus.BAD_REQUEST
    # The name of the DAG you wish to trigger
    dag_name = "{}_calculation_pipeline_dag".format(project_id)
    webserver_url = "{}/api/experimental/dags/{}/dag_runs".format(
        airflow_uri, dag_name)

    monitor_response = make_iap_request(webserver_url,
                                        iap_client_id,
                                        method="POST",
                                        json={"conf": data})
    logging.info("The monitoring Airflow response is %s", monitor_response)
    return "", HTTPStatus(monitor_response.status_code)
Exemple #7
0
def _handle_state_direct_ingest_file(data,
                                     start_ingest: bool) -> None:
    """Calls direct ingest cloud function when a new file is dropped into a
    bucket."""
    project_id = os.environ.get(GCP_PROJECT_ID_KEY)
    if not project_id:
        logging.error('No project id set for call to direct ingest cloud '
                      'function, returning.')
        return

    bucket = data['bucket']
    relative_file_path = data['name']
    region_code = get_state_region_code_from_direct_ingest_bucket(bucket)
    if not region_code:
        logging.error('Cannot parse region code from bucket %s, returning.',
                      bucket)
        return

    url = _DIRECT_INGEST_CLOUD_FUNCTION_URL.format(
        project_id, region_code, bucket, relative_file_path, str(start_ingest))

    logging.info("Calling URL: %s", url)

    # Hit the cloud function backend, which will schedule jobs to parse
    # data for unprocessed files in this bucket and persist to our database.
    response = make_iap_request(url, IAP_CLIENT_ID[project_id])
    logging.info("The response status is %s", response.status_code)
Exemple #8
0
def _handle_state_direct_ingest_file(
        data: Dict[str, Any], start_ingest: bool) -> Tuple[str, HTTPStatus]:
    """Calls direct ingest cloud function when a new file is dropped into a
    bucket."""
    project_id = os.environ.get(GCP_PROJECT_ID_KEY)
    if not project_id:
        error_str = (
            "No project id set for call to direct ingest cloud function, returning."
        )
        logging.error(error_str)
        return error_str, HTTPStatus.BAD_REQUEST

    bucket = data["bucket"]
    relative_file_path = data["name"]
    region_code = get_region_code_from_direct_ingest_bucket(bucket)
    if not region_code:
        error_str = f"Cannot parse region code from bucket {bucket}, returning."
        logging.error(error_str)
        return error_str, HTTPStatus.BAD_REQUEST

    url = _DIRECT_INGEST_CLOUD_FUNCTION_URL.format(project_id, region_code,
                                                   bucket, relative_file_path,
                                                   str(start_ingest))

    logging.info("Calling URL: %s", url)

    # Hit the cloud function backend, which will schedule jobs to parse
    # data for unprocessed files in this bucket and persist to our database.
    response = make_iap_request(url, IAP_CLIENT_ID[project_id])
    logging.info("The response status is %s", response.status_code)
    return "", HTTPStatus(response.status_code)
def make_iap_export_request(url: str) -> Dict[str, Any]:
    client_id = IAP_CLIENT_ID[os.environ.get("GCP_PROJECT_ID")]
    # make_iap_request raises an exception if the returned status code is not 200
    response = make_iap_request(url, client_id)

    # When operators return a value in airflow, the result is put into xcom for other operators to access it.
    # However, the result must be a built in Python data type otherwise the operator will not return successfully.
    return {"status_code": response.status_code, "text": response.text}
Exemple #10
0
def start_and_monitor_calculation_pipeline(_event, _context) -> None:
    """This function, which is triggered by a Pub/Sub event, can kick off any single Dataflow pipeline template.

    On successful triggering of the job, this function makes a call to the app
    to begin monitoring the progress of the job.
    """
    project_id = os.environ.get(GCP_PROJECT_ID_KEY)
    if not project_id:
        logging.error(
            "No project id set for call to run a calculation" " pipeline, returning."
        )
        return

    bucket = get_dataflow_template_bucket(project_id)

    template_name = os.environ.get("TEMPLATE_NAME")
    if not template_name:
        logging.error("No template_name set, returning.")
        return

    job_name = os.environ.get("JOB_NAME")
    if not job_name:
        logging.error("No job_name set, returning.")
        return

    on_dataflow_job_completion_topic = os.environ.get(
        "ON_DATAFLOW_JOB_COMPLETION_TOPIC"
    )
    if not on_dataflow_job_completion_topic:
        logging.error("No on-completion topic set, returning.")
        return

    region = os.environ.get("REGION")
    if not region:
        logging.error("No region set, returning.")
        return

    response = trigger_dataflow_job_from_template(
        project_id, bucket, template_name, job_name, region
    )

    logging.info("The response to triggering the Dataflow job is: %s", response)

    job_id = response["id"]
    location = response["location"]
    on_dataflow_job_completion_topic = on_dataflow_job_completion_topic.replace(
        ".", "-"
    )

    # Monitor the successfully triggered Dataflow job
    url = _DATAFLOW_MONITOR_URL.format(
        project_id, job_id, location, on_dataflow_job_completion_topic
    )

    monitor_response = make_iap_request(url, IAP_CLIENT_ID[project_id])
    logging.info("The monitoring Dataflow response is %s", monitor_response)
Exemple #11
0
def run_calculation_pipelines(_event, _context):
    """This function, which is triggered by a Pub/Sub event, kicks off a
    Dataflow job with the given job_name where the template for the job lives at
    gs://{bucket}/templates/{template_name} for the given project.

    On successful triggering of the job, this function makes a call to the app
    to begin monitoring the progress of the job.
    """
    project_id = os.environ.get('GCP_PROJECT')
    if not project_id:
        logging.error('No project id set for call to run a calculation'
                      ' pipeline, returning.')
        return

    bucket = get_dataflow_template_bucket(project_id)

    template_name = os.environ.get('TEMPLATE_NAME')
    if not template_name:
        logging.error('No template_name set, returning.')
        return

    job_name = os.environ.get('JOB_NAME')
    if not job_name:
        logging.error('No job_name set, returning.')
        return

    on_dataflow_job_completion_topic = os.environ.get(
        'ON_DATAFLOW_JOB_COMPLETION_TOPIC')
    if not on_dataflow_job_completion_topic:
        logging.error('No on-completion topic set, returning.')
        return

    response = trigger_dataflow_job_from_template(project_id, bucket,
                                                  template_name, job_name)

    logging.info("The response to triggering the Dataflow job is: %s",
                 response)

    job_id = response['id']
    location = response['location']
    on_dataflow_job_completion_topic = on_dataflow_job_completion_topic.replace(
        '.', '-')

    # Monitor the successfully triggered Dataflow job
    url = _DATAFLOW_MONITOR_URL.format(project_id, job_id, location,
                                       on_dataflow_job_completion_topic)

    monitor_response = make_iap_request(url, _CLIENT_ID[project_id])
    logging.info("The monitoring Dataflow response is %s", monitor_response)
Exemple #12
0
def handle_start_new_batch_email_reporting(request: Request) -> None:
    """Start a new batch of email generation for the indicated state.
    This function is the entry point for generating a new batch. It hits the App Engine endpoint `/start_new_batch`.
    It requires a JSON input containing the following keys:
        state_code: (required) State code for the report (i.e. "US_ID")
        report_type: (required) The type of report (i.e. "po_monthly_report")
        test_address: (optional) A test address to generate emails for
        region_code: (optional) The sub-region of the state to generate emails for (i.e. "US_ID_D5")
        message_body: (optional) If included, overrides the default message body.
    Args:
        request: The HTTP request. Must contain JSON with "state_code" and
        "report_type" keys, and may contain an optional "test_address" key.
    Returns:
        Nothing.
    Raises:
        Nothing. All exception raising is handled within the App Engine logic.
    """
    project_id = os.environ.get(GCP_PROJECT_ID_KEY)
    if not project_id:
        logging.error("No project id set, returning")
        return

    request_params = request.get_json()
    if not request_params:
        logging.error("No request params, returning")
        return

    query_params = build_query_param_string(
        request_params,
        [
            "state_code",
            "report_type",
            "test_address",
            "region_code",
            "message_body",
        ],
    )

    url = _APP_ENGINE_PO_MONTHLY_REPORT_GENERATE_EMAILS_URL.format(
        project_id, query_params
    )

    logging.info("Calling URL: %s", url)

    # Hit the App Engine endpoint `reporting/start_new_batch`.
    response = make_iap_request(url, IAP_CLIENT_ID[project_id])
    logging.info("The response status is %s", response.status_code)
Exemple #13
0
def export_view_data(_event, _context):
    """This function is triggered by a Pub/Sub event to begin the export of data contained in BigQuery views to files
    in cloud storage buckets.
    """
    project_id = os.environ.get('GCP_PROJECT')
    if not project_id:
        logging.error(
            'No project id set for call to export view data, returning.')
        return
    url = _VIEW_DATA_EXPORT_CLOUD_FUNCTION_URL.format(project_id)

    logging.info("project_id: %s", project_id)
    logging.info("Calling URL: %s", url)

    # Hit the cloud function backend, which exports view data to their assigned cloud storage bucket
    response = make_iap_request(url, IAP_CLIENT_ID[project_id])
    logging.info("The response status is %s", response.status_code)
Exemple #14
0
def _call_dashboard_export(data_type: str):
    project_id = os.environ.get('GCP_PROJECT')
    if not project_id:
        logging.error('No project id set for call to export dashboard data, '
                      'returning.')
        return

    bucket = get_dashboard_data_export_storage_bucket(project_id)

    url = _DASHBOARD_EXPORT_CLOUD_FUNCTION_URL.format(project_id, bucket,
                                                      data_type)
    logging.info("project_id: %s", project_id)
    logging.info("Calling URL: %s", url)

    # Hit the cloud function backend, which exports the given data type to
    # the given cloud storage bucket
    response = make_iap_request(url, _CLIENT_ID[project_id])
    logging.info("The response status is %s", response.status_code)
Exemple #15
0
def trigger_calculation_pipeline_dag(data, _context) -> None:
    """This function is triggered by a Pub/Sub event, triggers an Airflow DAG where all
    the calculation pipelines run simultaneously.
    """
    gcp_project_id = os.environ.get(GCP_PROJECT_ID_KEY, '')
    project_id = gcp_project_id + '-airflow'
    if not project_id:
        logging.error('No project id set for call to run the calculation pipelines, returning.')
        return

    webserver_id = os.environ.get('WEBSERVER_ID')
    if not webserver_id:
        logging.error("The environment variable 'WEBSERVER_ID' is not set")
        return
    # The name of the DAG you wish to trigger
    dag_name = '{}_calculation_pipeline_dag'.format(gcp_project_id)
    webserver_url = 'https://{}.appspot.com/api/experimental/dags/{}/dag_runs'.format(webserver_id, dag_name)

    monitor_response = make_iap_request(webserver_url, IAP_CLIENT_ID[project_id], method='POST', json={"conf": data})
    logging.info("The monitoring Airflow response is %s", monitor_response)
Exemple #16
0
def export_dashboard_data(_event, _context):
    """This function is triggered by a Pub/Sub event to begin the export of
    data needed for the dashboard.
    """
    project_id = os.environ.get('GCP_PROJECT')
    if not project_id:
        logging.error('No project id set for call to export dashboard data, '
                      'returning.')
        return

    bucket = get_dashboard_data_export_storage_bucket(project_id)

    url = _DASHBOARD_EXPORT_CLOUD_FUNCTION_URL.format(project_id, bucket)
    logging.info("project_id: %s", project_id)
    logging.info("Calling URL: %s", url)

    # Hit the cloud function backend, which exports the given data type to
    # the given cloud storage bucket
    response = make_iap_request(url, _CLIENT_ID[project_id])
    logging.info("The response status is %s", response.status_code)
Exemple #17
0
def handle_new_case_triage_etl(data: Dict[str, Any],
                               _: ContextType) -> Tuple[str, HTTPStatus]:
    """This function is triggered when a file is dropped in the
    `{project_id}-case-triage-data` bucket. If the file matches `etl_*.csv`,
    then it makes a request to import the CSV to Cloud SQL.
    """
    project_id = os.environ.get(GCP_PROJECT_ID_KEY)
    if not project_id:
        logging.error(
            "No project id set for call to update auth0 users, returning.")
        return "", HTTPStatus.BAD_REQUEST

    filename = data["name"]
    if not filename.startswith("etl_") or not filename.endswith(".csv"):
        logging.info("Ignoring file %s", filename)
        return "", HTTPStatus.OK

    import_url = _APP_ENGINE_IMPORT_CASE_TRIAGE_ETL_CSV_TO_SQL_URL.format(
        project_id, filename)
    import_response = make_iap_request(import_url, IAP_CLIENT_ID[project_id])
    return "", HTTPStatus(import_response.status_code)
Exemple #18
0
def export_metric_view_data(event, _context) -> None:
    """This function is triggered by a Pub/Sub event to begin the export of data contained in BigQuery metric views to
    files in cloud storage buckets.
    """
    project_id = os.environ.get(GCP_PROJECT_ID_KEY)
    if not project_id:
        logging.error('No project id set for call to export view data, returning.')
        return

    if 'data' in event:
        logging.info("data found")
        url = _METRIC_VIEW_EXPORT_CLOUD_FUNCTION_URL.format(project_id) + '?export_job_filter=' + \
                b64decode(event['data']).decode('utf-8')
    else:
        url = _METRIC_VIEW_EXPORT_CLOUD_FUNCTION_URL.format(project_id)

    logging.info("project_id: %s", project_id)
    logging.info("Calling URL: %s", url)

    # Hit the cloud function backend, which exports view data to their assigned cloud storage bucket
    response = make_iap_request(url, IAP_CLIENT_ID[project_id])
    logging.info("The response status is %s", response.status_code)