Python project_id Beispiele, recidiviz.utils.metadata.project_id Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: direct_ingest_control.py Projekt: jazzPouls/pulse-data

def upload_from_sftp() -> Tuple[str, HTTPStatus]:
    """Connects to remote SFTP servers and uploads the files in both raw and normalized form
    to GCS buckets to start the ingest process. Should only be called from a task queue scheduler.

    Args:
        region_code (Optional[str]): required as part of the request to identify the region
        date_str (Optional[str]): ISO format date string,
            used to determine the lower bound date in which to start
            pulling items from the SFTP server. If None, uses yesterday as the default lower
            bound time, otherwises creates a datetime from the string.
        bucket_str (Optional[str]): GCS bucket name, used to override the
            destination in which the SFTP assets are downloaded to and moved for proper
            ingest (therefore used in both controllers). If None, uses the bucket determined
            by |region_code| otherwise, uses this destination.
    """
    logging.info("Received request for uploading files from SFTP: %s",
                 request.values)
    region_code = get_str_param_value("region", request.values)
    date_str = get_str_param_value("date", request.values)
    bucket_str = get_str_param_value("bucket", request.values)

    if not region_code:
        return f"Bad parameters [{request.values}]", HTTPStatus.BAD_REQUEST

    with monitoring.push_region_tag(region_code):
        lower_bound_update_datetime = (
            datetime.datetime.fromisoformat(date_str) if date_str is not None
            else datetime.datetime.utcnow() - datetime.timedelta(1))
        sftp_controller = DownloadFilesFromSftpController(
            project_id=metadata.project_id(),
            region=region_code,
            lower_bound_update_datetime=lower_bound_update_datetime,
            gcs_destination_path=bucket_str,
        )
        downloaded_items, unable_to_download_items = sftp_controller.do_fetch()

        if downloaded_items:
            _, unable_to_upload_files = UploadStateFilesToIngestBucketController(
                paths_with_timestamps=downloaded_items,
                project_id=metadata.project_id(),
                region=region_code,
                gcs_destination_path=bucket_str,
            ).do_upload()

            sftp_controller.clean_up()

            if unable_to_download_items or unable_to_upload_files:
                return (
                    f"Unable to download the following files: {unable_to_download_items}, "
                    f"and upload the following files: {unable_to_upload_files}",
                    HTTPStatus.MULTI_STATUS,
                )
        elif unable_to_download_items:
            return (
                f"Unable to download the following files {unable_to_download_items}",
                HTTPStatus.MULTI_STATUS,
            )
        elif not downloaded_items and not unable_to_download_items:
            return f"No items to download for {region_code}", HTTPStatus.MULTI_STATUS
    return "", HTTPStatus.OK

Beispiel #2

0

Datei anzeigen

Datei: direct_ingest_instance.py Projekt: Recidiviz/pulse-data

    def database_version(
        # TODO(#7984): Remove the state_code arg once all states have been migrated to
        #   multi-DB.
        self,
        system_level: SystemLevel,
        state_code: Optional[StateCode],
    ) -> SQLAlchemyStateDatabaseVersion:
        """Return the database version for this instance."""
        self.check_is_valid_system_level(system_level)

        if system_level == SystemLevel.COUNTY:
            # County direct ingest writes to single, multi-tenant database
            return SQLAlchemyStateDatabaseVersion.LEGACY

        if system_level == SystemLevel.STATE:
            if not state_code:
                raise ValueError("Found null state_code for STATE schema.")
            if self == self.SECONDARY:
                return SQLAlchemyStateDatabaseVersion.SECONDARY
            if self == self.PRIMARY:
                # TODO(#7984): Switch this to SQLAlchemyStateDatabaseVersion.PRIMARY
                #  once all states have been migrated to multi-DB.
                if metadata.project_id(
                ) not in STATE_TO_PRIMARY_DATABASE_VERSION:
                    if not environment.in_test():
                        raise ValueError(
                            f"Unexpected project id {metadata.project_id()}")
                    return SQLAlchemyStateDatabaseVersion.LEGACY

                return STATE_TO_PRIMARY_DATABASE_VERSION[
                    metadata.project_id()][state_code]

        raise ValueError(
            f"Unexpected combination of [{system_level}] and instance type [{self}]"
        )

Beispiel #3

0

Datei anzeigen

Datei: metadata_test.py Projekt: Recidiviz/pulse-data

    def test_local_project_id_override_throws_if_called_nested(self) -> None:
        original_project = metadata.project_id()

        with local_project_id_override("recidiviz-456"):
            self.assertEqual("recidiviz-456", metadata.project_id())
            with self.assertRaises(ValueError):
                with local_project_id_override("recidiviz-678"):
                    pass
            self.assertEqual("recidiviz-456", metadata.project_id())

        self.assertEqual(original_project, metadata.project_id())

Beispiel #4

0

Datei anzeigen

    def test_local_project_id_override(self):
        original_project = metadata.project_id()

        with local_project_id_override('recidiviz-456'):
            self.assertEqual('recidiviz-456', metadata.project_id())

        self.assertEqual(original_project, metadata.project_id())

        with local_project_id_override('recidiviz-678'):
            self.assertEqual('recidiviz-678', metadata.project_id())

        self.assertEqual(original_project, metadata.project_id())

Beispiel #5

0

Datei anzeigen

Datei: metadata_test.py Projekt: Recidiviz/pulse-data

    def test_local_project_id_override(self) -> None:
        original_project = metadata.project_id()

        with local_project_id_override("recidiviz-456"):
            self.assertEqual("recidiviz-456", metadata.project_id())

        self.assertEqual(original_project, metadata.project_id())

        with local_project_id_override("recidiviz-678"):
            self.assertEqual("recidiviz-678", metadata.project_id())

        self.assertEqual(original_project, metadata.project_id())

Beispiel #6

0

Datei anzeigen

Datei: direct_ingest_control.py Projekt: jazzPouls/pulse-data

def update_raw_data_latest_views_for_state() -> Tuple[str, HTTPStatus]:
    """Updates raw data tables for a given state"""
    logging.info("Received request to do direct ingest raw data update: [%s]",
                 request.values)
    region_code = get_str_param_value("region", request.values)

    if not region_code:
        return f"Bad parameters [{request.values}]", HTTPStatus.BAD_REQUEST

    with monitoring.push_region_tag(region_code):
        bq_client = BigQueryClientImpl(project_id=metadata.project_id())
        controller = DirectIngestRawDataTableLatestViewUpdater(
            region_code, metadata.project_id(), bq_client)
        controller.update_views_for_state()
    return "", HTTPStatus.OK

Beispiel #7

0

Datei anzeigen

    def test_create_scrape_task(self, mock_client):
        """Tests that a task is created."""
        url = '/test/work'
        queue_name = 'testqueue'
        params = {'a': 'hello'}
        queue_path = queue_name + '-path'
        mock_client.return_value.queue_path.return_value = queue_path
        task_path = queue_path + '/us_ny-12345'
        mock_client.return_value.task_path.return_value = task_path

        queues.create_scrape_task(
            region_code='us_ny', queue_name=queue_name, url=url, body=params)

        body_encoded = json.dumps(params).encode()
        task = tasks.types.Task(
            name=task_path,
            app_engine_http_request={
                'relative_uri': url,
                'body': body_encoded
            }
        )

        mock_client.return_value.queue_path.assert_called_with(
            metadata.project_id(), metadata.region(), queue_name)
        mock_client.return_value.create_task.assert_called_with(
            queue_path, task)

Beispiel #8

0

Datei anzeigen

    def test_create_bq_task(self, mock_client, mock_uuid, mock_datetime):
        """Tests that a BQ export task is created."""
        url = '/test/bq'
        queue_name = queues.BIGQUERY_QUEUE
        table_name = 'test_table'
        module = 'test_module'
        uuid = 'random-uuid'
        date = '1900-01-01'
        queue_path = queue_name + '-path'
        mock_uuid.uuid4.return_value = uuid
        mock_datetime.datet.today.return_value = date
        mock_client.return_value.queue_path.return_value = queue_path
        task_path = queue_path + '/{}-{}-{}-{}'.format(
            table_name, module, date, uuid)
        mock_client.return_value.task_path.return_value = task_path

        queues.create_bq_task(
            table_name=table_name, url=url, module=module)

        params = {'table_name': table_name, 'module': module}
        body_encoded = json.dumps(params).encode()

        task = tasks.types.Task(
            name=task_path,
            app_engine_http_request={
                'relative_uri': url,
                'body': body_encoded
            }
        )

        mock_client.return_value.queue_path.assert_called_with(
            metadata.project_id(), metadata.region(), queue_name)
        mock_client.return_value.create_task.assert_called_with(
            queue_path, task)

Beispiel #9

0

Datei anzeigen

Datei: direct_ingest_raw_file_import_manager.py Projekt: jazzPouls/pulse-data

    def __init__(
        self,
        *,
        region: Region,
        fs: DirectIngestGCSFileSystem,
        ingest_directory_path: GcsfsDirectoryPath,
        temp_output_directory_path: GcsfsDirectoryPath,
        big_query_client: BigQueryClient,
        region_raw_file_config: Optional[
            DirectIngestRegionRawFileConfig] = None,
        upload_chunk_size: int = _DEFAULT_BQ_UPLOAD_CHUNK_SIZE,
    ):

        self.region = region
        self.fs = fs
        self.ingest_directory_path = ingest_directory_path
        self.temp_output_directory_path = temp_output_directory_path
        self.big_query_client = big_query_client
        self.region_raw_file_config = (
            region_raw_file_config
            if region_raw_file_config else DirectIngestRegionRawFileConfig(
                region_code=self.region.region_code,
                region_module=self.region.region_module,
            ))
        self.upload_chunk_size = upload_chunk_size
        self.csv_reader = GcsfsCsvReader(
            gcsfs.GCSFileSystem(project=metadata.project_id(),
                                cache_timeout=GCSFS_NO_CACHING))
        self.raw_table_migrations = DirectIngestRawTableMigrationCollector(
            region_code=self.region.region_code,
            regions_module_override=self.region.region_module,
        ).collect_raw_table_migration_queries()

Beispiel #10

0

Datei anzeigen

def get_importable_csvs(
    override_project_id: Optional[str] = None,
) -> Dict[str, GcsfsFilePath]:
    """Returns a map from view ids to GcsfsFilePaths where the views have
    been exported to CSVs."""
    gcsfs = GcsfsFactory.build()
    project_id = (
        metadata.project_id() if not override_project_id else override_project_id
    )

    files_in_import_folder = [
        f
        for f in gcsfs.ls_with_blob_prefix(
            bucket_name=f"{project_id}-case-triage-data",
            blob_prefix="to_import",
        )
        if isinstance(f, GcsfsFilePath)
    ]

    importable_csvs = {}

    for f in files_in_import_folder:
        root, ext = os.path.splitext(f.file_name)
        if ext != ".csv":
            continue
        importable_csvs[root] = f

    return importable_csvs

Beispiel #11

0

Datei anzeigen

Datei: ops_routes.py Projekt: Recidiviz/pulse-data

def _run_gcs_imports() -> Tuple[str, HTTPStatus]:
    """Exposes an endpoint to trigger standard GCS imports."""
    body = get_cloud_task_json_body()
    filename = body.get("filename")
    if not filename:
        return "Must include `filename` in the json payload", HTTPStatus.BAD_REQUEST
    for builder in CASE_TRIAGE_EXPORTED_VIEW_BUILDERS:
        if f"{builder.view_id}.csv" != filename:
            continue

        csv_path = GcsfsFilePath.from_absolute_path(
            os.path.join(
                CASE_TRIAGE_VIEWS_OUTPUT_DIRECTORY_URI.format(
                    project_id=metadata.project_id()),
                filename,
            ))

        import_gcs_csv_to_cloud_sql(
            SchemaType.CASE_TRIAGE,
            builder.view_id,
            csv_path,
            builder.columns,
            seconds_to_wait=180,
        )
        logging.info("View (%s) successfully imported", builder.view_id)

    return "", HTTPStatus.OK

Beispiel #12

0

Datei anzeigen

    def get_unioned_table_view_query_format_string(
            self, state_codes: List[StateCode],
            table: Table) -> Tuple[str, Dict[str, str]]:
        """Returns a tuple (query format string, kwargs) for a view query that unions
        table data from each of the state-segmented datasets into a single table.
        """

        state_select_queries = []
        kwargs = {}
        for state_code in state_codes:
            address = self.materialized_address_for_segment_table(
                table=table, state_code=state_code)
            dataset_key = f"{state_code.value.lower()}_specific_dataset"
            table_key = f"{state_code.value.lower()}_specific_table_id"
            kwargs[dataset_key] = address.dataset_id
            kwargs[table_key] = address.table_id

            bq_query_builder = BigQuerySchemaTableRegionFilteredQueryBuilder(
                project_id=metadata.project_id(),
                dataset_id=address.dataset_id,
                table=table,
                schema_type=self.schema_type,
                columns_to_include=self._get_table_columns_to_export(table),
                region_codes_to_include=[state_code.value.upper()],
                region_codes_to_exclude=None,
            )

            state_select_queries.append(
                f"{bq_query_builder.select_clause()} "
                f"FROM `{{project_id}}.{{{dataset_key}}}.{address.table_id}` {address.table_id}"
            )
        table_union_query = "\nUNION ALL\n".join(state_select_queries)
        return table_union_query, kwargs

Beispiel #13

0

Datei anzeigen

def export_view_data_to_cloud_storage(
    export_job_name: str,
    state_code: Optional[str] = None,
    override_view_exporter: Optional[BigQueryViewExporter] = None,
    should_materialize_views: Optional[bool] = True,
    destination_override: Optional[str] = None,
    dataset_overrides: Optional[Dict[str, str]] = None,
) -> None:
    """Exports data in BigQuery metric views to cloud storage buckets.

    Optionally takes in a BigQueryViewExporter for performing the export operation. If none is provided, this defaults
    to using a CompositeBigQueryViewExporter with delegates of JsonLinesBigQueryViewExporter and
    OptimizedMetricBigQueryViewExporter.
    """

    project_id = metadata.project_id()

    export_configs_for_filter = get_configs_for_export_name(
        export_name=export_job_name,
        state_code=state_code,
        project_id=project_id,
        destination_override=destination_override,
        dataset_overrides=dataset_overrides,
    )

    if should_materialize_views:
        rematerialize_views_for_metric_export(
            export_view_configs=export_configs_for_filter)

    do_metric_export_for_configs(
        export_configs={export_job_name: export_configs_for_filter},
        override_view_exporter=override_view_exporter,
        state_code_filter=state_code,
    )

Beispiel #14

0

Datei anzeigen

def gcsfs_direct_ingest_storage_directory_path_for_region(
    *,
    region_code: str,
    system_level: SystemLevel,
    ingest_instance: DirectIngestInstance,
    file_type: Optional[GcsfsDirectIngestFileType] = None,
    project_id: Optional[str] = None,
) -> GcsfsDirectoryPath:
    if project_id is None:
        project_id = metadata.project_id()
        if not project_id:
            raise ValueError("Project id not set")

    suffix = bucket_suffix_for_ingest_instance(ingest_instance)
    bucket_name = build_ingest_storage_bucket_name(
        project_id=project_id,
        system_level_str=system_level.value.lower(),
        suffix=suffix,
    )
    storage_bucket = GcsfsBucketPath(bucket_name)

    if file_type is not None:
        subdir = os.path.join(region_code.lower(), file_type.value)
    else:
        subdir = region_code.lower()
    return GcsfsDirectoryPath.from_dir_and_subdir(storage_bucket, subdir)

Beispiel #15

0

Datei anzeigen

def gcsfs_direct_ingest_temporary_output_directory_path(project_id: Optional[str] = None) -> str:
    if project_id is None:
        project_id = metadata.project_id()
        if not project_id:
            raise ValueError("Project id not set")

    return f'{project_id}-direct-ingest-temporary-files'

Beispiel #16

0

Datei anzeigen

Datei: scrape_aggregate_reports.py Projekt: Recidiviz/pulse-data

def build_path(bucket_template: str, state: str,
               pdf_name: str) -> GcsfsFilePath:
    return GcsfsFilePath.from_directory_and_file_name(
        GcsfsDirectoryPath(bucket_template.format(metadata.project_id()),
                           state),
        pdf_name,
    )

Beispiel #17

0

Datei anzeigen

def main(database: SchemaType, repo_root: str, ssl_cert_path: str) -> None:
    """
    Invokes the main code path for running a downgrade.

    This checks for user validations that the database and branches are correct and then runs the downgrade
    migration.
    """

    is_prod = metadata.project_id() == GCP_PROJECT_PRODUCTION
    if is_prod:
        logging.info("RUNNING AGAINST PRODUCTION\n")

    prompt_for_confirmation("This script will run a DOWNGRADE migration.", "DOWNGRADE")
    confirm_correct_db(database)
    confirm_correct_git_branch(repo_root, is_prod=is_prod)

    overriden_env_vars = SQLAlchemyEngineManager.update_sqlalchemy_env_vars(
        database,
        ssl_cert_path=ssl_cert_path,
        migration_user=True,
    )

    # Run downgrade
    try:
        config = alembic.config.Config(
            SQLAlchemyEngineManager.get_alembic_file(database)
        )
        alembic.command.downgrade(config, "-1")
    except Exception as e:
        logging.error("Downgrade failed to run: %s", e)
        sys.exit(1)
    finally:
        local_postgres_helpers.restore_local_env_vars(overriden_env_vars)

Beispiel #18

0

Datei anzeigen

def _get_thresholds_for_system_level(system_level: SystemLevel,
                                     region_code: str) -> Dict[str, float]:
    """Returns the dictionary of error thresholds for a given system level."""
    error_thresholds = SYSTEM_TYPE_TO_ERROR_THRESHOLD.get(system_level)

    if error_thresholds is None:
        raise ValueError(
            f"Found no error thresholds associated with `system_level=[{system_level}]`"
        )

    state_code: str = region_code.upper()

    # Override the entity matching threshold from the default value, if applicable.
    project_id = metadata.project_id()
    if (not project_id or project_id
            not in STATE_CODE_TO_ENTITY_MATCHING_THRESHOLD_OVERRIDE):
        raise ValueError(
            f"Unexpected project id [{project_id}] - must be one of "
            f"{STATE_CODE_TO_ENTITY_MATCHING_THRESHOLD_OVERRIDE.keys()}.")

    thresholds_for_project = STATE_CODE_TO_ENTITY_MATCHING_THRESHOLD_OVERRIDE[
        project_id]
    if state_code in thresholds_for_project:
        state_specific_threshold = thresholds_for_project[state_code]
        if state_specific_threshold is None:
            raise ValueError(
                f"Override unexpectedly None for state_code [{state_code}].")
        error_thresholds[ENTITY_MATCHING_THRESHOLD] = state_specific_threshold
    return error_thresholds

Beispiel #19

0

Datei anzeigen

Datei: big_query_view.py Projekt: teymour-aldridge/pulse-data

    def __init__(self,
                 *,
                 project_id: Optional[str] = None,
                 dataset_id: str,
                 view_id: str,
                 view_query_template: str,
                 materialized_view_table_id: Optional[str] = None,
                 **query_format_kwargs):

        if project_id is None:
            project_id = metadata.project_id()

            if not project_id:
                # BigQueryViews are sometimes declared as top-level objects that are instantiated with file load. This
                # means this constructor might execute inside of an import and before a test has a chance to mock the
                # project id. This keeps us from constructing a DatasetReference with a None project_id, which will
                # throw.
                project_id = test_only_project_id()

        dataset_ref = bigquery.DatasetReference.from_string(dataset_id,
                                                            default_project=project_id)
        super().__init__(dataset_ref, view_id)
        self._view_id = view_id
        self._view_query = view_query_template.format(**self._query_format_args(**query_format_kwargs))
        self._materialized_view_table_id = materialized_view_table_id

Beispiel #20

0

Datei anzeigen

    def get_batch_ids(
            self,
            state_code: StateCode,
            *,
            override_fs: Optional[GCSFileSystem] = None) -> List[str]:
        """Returns a sorted list of batch id numbers from the a specific state bucket from GCS"""
        if in_development():
            project_id = GCP_PROJECT_STAGING
        else:
            project_id = metadata.project_id()
        if override_fs is None:
            buckets = self.monthly_reports_gcsfs.ls_with_blob_prefix(
                bucket_name=f"{project_id}-report-html",
                blob_prefix=state_code.value,
            )
        else:
            buckets = override_fs.ls_with_blob_prefix(
                bucket_name=f"{project_id}-report-html",
                blob_prefix=state_code.value,
            )
        files = [file for file in buckets if isinstance(file, GcsfsFilePath)]
        batch_ids = list(
            {batch_id.blob_name.split("/")[1]
             for batch_id in files})
        batch_ids.sort(reverse=True)

        return batch_ids

Beispiel #21

0

Datei anzeigen

 def query_str_for_region_code(self, region_code: str) -> str:
     return "SELECT * FROM `{project_id}.{dataset}.{table}` " \
            " WHERE region_code = '{region_code}'" \
         .format(project_id=metadata.project_id(),
                 dataset=dataset_config.VIEWS_DATASET,
                 table=self.view.view_id,
                 region_code=region_code)

Beispiel #22

0

Datei anzeigen

Datei: google_cloud_tasks_client_wrapper.py Projekt: xgenie-007/pulse-data

 def __init__(
         self,
         cloud_tasks_client: Optional[tasks_v2.CloudTasksClient] = None,
         project_id: Optional[str] = None):
     self.client = cloud_tasks_client \
         if cloud_tasks_client else tasks_v2.CloudTasksClient()
     self.project_id = project_id if project_id else metadata.project_id()
     self.queues_region = QUEUES_REGION

Beispiel #23

0

Datei anzeigen

Datei: gae.py Projekt: Recidiviz/pulse-data

    def auth_and_call(*args: Any, **kwargs: Any) -> Any:
        """Authenticates the inbound request and delegates.

        Args:
            *args: args to the function
            **kwargs: keyword args to the function

        Returns:
            The output of the function, if successfully authenticated.
            An error or redirect response, otherwise.
        """
        if in_development():
            # Bypass GAE auth check in development.
            return func(*args, **kwargs)

        is_cron = request.headers.get("X-Appengine-Cron")
        is_task = request.headers.get("X-AppEngine-QueueName")
        incoming_app_id = request.headers.get("X-Appengine-Inbound-Appid")
        jwt = request.headers.get("x-goog-iap-jwt-assertion")

        project_id = metadata.project_id()
        project_number = metadata.project_number()

        if is_cron:
            logging.info("Requester is one of our cron jobs, proceeding.")

        elif is_task:
            logging.info("Requester is the taskqueue, proceeding.")

        elif incoming_app_id:
            # Check whether this is an intra-app call from our GAE service
            logging.info("Requester authenticated as app-id: [%s].", incoming_app_id)

            if incoming_app_id == project_id:
                logging.info("Authenticated intra-app call, proceeding.")
            else:
                logging.info("App ID is [%s], not allowed - exiting.", incoming_app_id)
                return (
                    "Failed: Unauthorized external request.",
                    HTTPStatus.UNAUTHORIZED,
                )
        elif jwt:
            (
                user_id,
                user_email,
                error_str,
            ) = validate_jwt.validate_iap_jwt_from_app_engine(
                jwt, project_number, project_id
            )
            logging.info("Requester authenticated as [%s] ([%s]).", user_id, user_email)
            if error_str:
                logging.info("Error validating user credentials: [%s].", error_str)
                return ("Error: %s" % error_str, HTTPStatus.UNAUTHORIZED)
        else:
            return ("Failed: Unauthorized external request.", HTTPStatus.UNAUTHORIZED)

        # If we made it this far, client is authorized - run the decorated func
        return func(*args, **kwargs)

Beispiel #24

0

Datei anzeigen

def state_aggregate() -> Tuple[str, HTTPStatus]:
    """Calls state aggregates"""

    # Please add new states in alphabetical order
    state_to_parser = {
        'california': ca_aggregate_ingest.parse,
        'florida': fl_aggregate_ingest.parse,
        'georgia': ga_aggregate_ingest.parse,
        'hawaii': hi_aggregate_ingest.parse,
        'kentucky': ky_aggregate_ingest.parse,
        'new_york': ny_aggregate_ingest.parse,
        'pennsylvania': pa_aggregate_ingest.parse,
        'tennessee': tn_aggregate_ingest.parse,
        'texas': tx_aggregate_ingest.parse,
    }

    bucket = get_str_param_value('bucket', request.args)
    state = get_str_param_value('state', request.args)
    filename = get_str_param_value('filename', request.args)
    project_id = metadata.project_id()
    logging.info("The project id is %s", project_id)
    if not bucket or not state or not filename:
        raise StateAggregateError(
            "All of state, bucket, and filename must be provided")
    path = os.path.join(bucket, state, filename)
    parser = state_to_parser[state]
    # Don't use the gcsfs cache
    fs = gcsfs.GCSFileSystem(project=project_id,
                             cache_timeout=GCSFS_NO_CACHING)
    logging.info("The path to download from is %s", path)

    # TODO(#3292): Uncomment once gcsfs.ls is more stable
    # bucket_path = os.path.join(bucket, state)
    # logging.info("The files in the directory are:")
    # logging.info(fs.ls(bucket_path))

    # Providing a stream buffer to tabula reader does not work because it
    # tries to load the file into the local filesystem, since appengine is a
    # read only filesystem (except for the tmpdir) we download the file into
    # the local tmpdir and pass that in.
    tmpdir_path = os.path.join(tempfile.gettempdir(), filename)
    fs.get(path, tmpdir_path)
    logging.info("Successfully downloaded file from gcs: %s", path)

    try:
        result = parser(os.path.join(bucket, state), tmpdir_path)
        logging.info('Successfully parsed the report')
        for table, df in result.items():
            dao.write_df(table, df)

        # If we are successful, we want to move the file out of the cloud
        # function triggered directory, and into the historical path.
        historical_path = os.path.join(HISTORICAL_BUCKET.format(project_id),
                                       state, filename)
        fs.mv(path, historical_path)
        return '', HTTPStatus.OK
    except Exception as e:
        return jsonify(e), HTTPStatus.INTERNAL_SERVER_ERROR

Beispiel #25

0

Datei anzeigen

Datei: backup_manager.py Projekt: dxy/pulse-data

def update_long_term_backups() -> Tuple[str, HTTPStatus]:
    """Create a new manual backup and delete the oldest manual backup once the
    maximum number has been reached
    """
    credentials, _ = google.auth.default()
    client = discovery.build('sqladmin', 'v1beta4', credentials=credentials)

    project_id = metadata.project_id()
    instance_id = _get_cloudsql_instance_id()

    logging.info('Creating request for backup insert operation')
    insert_request = client.backupRuns().insert(project=project_id,
                                                instance=instance_id,
                                                body={})

    logging.info('Beginning backup insert operation')
    insert_operation = insert_request.execute()
    _await_operation(client, project_id, insert_operation['name'])
    _throw_if_error(client, project_id, insert_operation['name'], 'insert')
    logging.info('Backup insert operation completed')

    logging.info('Creating request for backup list operation')
    list_request = client.backupRuns().list(project=project_id,
                                            instance=instance_id)

    logging.info('Beginning backup list request')
    list_result = list_request.execute()
    backup_runs = list_result['items']
    manual_backup_runs = [
        backup_run for backup_run in backup_runs
        if backup_run['type'] == 'ON_DEMAND'
    ]
    logging.info(
        'Backup list request completed with [%s] total backup runs '
        'and [%s] manual backup runs', str(len(backup_runs)),
        str(len(manual_backup_runs)))

    if len(manual_backup_runs) > _MAX_COUNT_MANUAL_BACKUPS:
        # startTime is a string with format yyyy-mm-dd, so sorting it as a
        # string will give the same result as converting it to a date and then
        # sorting by date
        manual_backup_runs.sort(key=lambda backup_run: backup_run['startTime'])
        oldest_manual_backup = manual_backup_runs[0]
        oldest_manual_backup_id = oldest_manual_backup['id']

        logging.info('Creating request for backup delete operation')
        delete_request = client.backupRuns().delete(project=project_id,
                                                    instance=instance_id,
                                                    id=oldest_manual_backup_id)

        logging.info('Beginning backup delete operation')
        delete_operation = delete_request.execute()
        _await_operation(client, project_id, delete_operation['name'])
        _throw_if_error(client, project_id, delete_operation['name'], 'delete')
        logging.info('Backup delete operation completed')

    logging.info('All backup operations completed successfully')
    return ('', HTTPStatus.OK)

Beispiel #26

0

Datei anzeigen

Datei: queues.py Projekt: dxy/pulse-data

def format_task_path(queue_name: str, task_name: str):
    """Creates a task path out of the necessary parts.

    Task path is of the form:
        '/projects/{project}/locations/{location}'
        '/queues/{queue}/tasks/{task_name}'
    """
    return client().task_path(metadata.project_id(), metadata.region(),
                              queue_name, task_name)

Beispiel #27

0

Datei anzeigen

Datei: gcsfs_direct_ingest_utils.py Projekt: dxy/pulse-data

def gcsfs_direct_ingest_storage_directory_path_for_region(
        region_code: str, system_level: SystemLevel) -> str:
    project_id = metadata.project_id()
    if not project_id:
        raise ValueError("Project id not set")

    storage_bucket = \
        f'{project_id}-direct-ingest-{system_level.value.lower()}-storage'
    return os.path.join(storage_bucket, region_code)

Beispiel #28

0

Datei anzeigen

Datei: monitoring.py Projekt: dxy/pulse-data

def stats():
    global _stats
    if not _stats:
        new_stats = stats_module.Stats()
        if environment.in_gae() and not environment.in_test():
            exporter = stackdriver.new_stats_exporter(
                stackdriver.Options(project_id=metadata.project_id(), ))
            new_stats.view_manager.register_exporter(exporter)
        _stats = new_stats
    return _stats

Beispiel #29

0

Datei anzeigen

    def __init__(self, project_id: Optional[str] = None):
        if not project_id:
            project_id = metadata.project_id()

        if not project_id:
            raise ValueError(
                'Must provide a project_id if metadata.project_id() returns None'
            )

        self._project_id = project_id
        self.client = client(self._project_id)

Beispiel #30

0

Datei anzeigen

Datei: csv_gcsfs_direct_ingest_controller.py Projekt: Leo-Ryu/pulse-data

 def __init__(self,
              region_name: str,
              system_level: SystemLevel,
              ingest_directory_path: Optional[str],
              storage_directory_path: Optional[str],
              max_delay_sec_between_files: Optional[int] = None):
     super().__init__(region_name, system_level, ingest_directory_path,
                      storage_directory_path, max_delay_sec_between_files)
     self.csv_reader = GcsfsCsvReader(
         gcsfs.GCSFileSystem(project=metadata.project_id(),
                             cache_timeout=GCSFS_NO_CACHING))