Python GcsfsFilePath.uri 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: recidiviz.cloud_storage.gcsfs_path

클래스/타입: GcsfsFilePath

메소드/함수: uri

hotexamples.com에서의 예제들: 8

Python GcsfsFilePath.uri - 8개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 recidiviz.cloud_storage.gcsfs_path.GcsfsFilePath.uri에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

GcsfsFilePath(30)

from_absolute_path(30)

from_directory_and_file_name(21)

abs_path(8)

uri(8)

from_bucket_and_blob_name(7)

with_new_file_name(1)

예제 #1

파일 보기

    def _copy_to_ingest_bucket(
        self,
        path: str,
        full_file_upload_path: GcsfsFilePath,
    ) -> None:
        """Moves a file within GCS to the appropriate bucket if it has not already been deemed
        processed or discovered by the file metadata manager.

        We check both processed and discovered because a file may be discovered and awaiting to be
        ingested, so we will not re-upload. We check processed because a file may have already been
        ingested, but has been deleted from the bucket."""
        if not self.postgres_direct_ingest_file_metadata_manager.has_raw_file_been_discovered(
                full_file_upload_path
        ) and not self.postgres_direct_ingest_file_metadata_manager.has_raw_file_been_processed(
                full_file_upload_path):
            try:
                mimetype, _ = guess_type(os.path.basename(path))
                self.gcsfs.mv(
                    src_path=GcsfsFilePath.from_absolute_path(path),
                    dst_path=full_file_upload_path,
                )
                self.gcsfs.set_content_type(
                    full_file_upload_path,
                    mimetype if mimetype else "text/plain")
                logging.info("Copied %s -> %s", path,
                             full_file_upload_path.uri())
                self.uploaded_files.append(path)
            except BaseException as e:
                logging.warning(
                    "Could not copy %s -> %s due to error %s",
                    path,
                    full_file_upload_path.uri(),
                    e.args,
                )
                self.unable_to_upload_files.append(path)
        else:
            logging.info(
                "Skipping %s -> %s, due to %s already being processed",
                path,
                full_file_upload_path.uri(),
                full_file_upload_path.uri(),
            )
            self.skipped_files.append(path)

예제 #2

파일 보기

파일: upload_raw_state_files_to_ingest_bucket_with_date.py 프로젝트: Recidiviz/pulse-data

    def _copy_to_ingest_bucket(
        self,
        path: str,
        full_file_upload_path: GcsfsFilePath,
    ) -> None:
        if not self.dry_run:
            try:
                gsutil_cp(path, full_file_upload_path.uri())
                self.uploaded_files.append(path)
                self.copies_list.append((path, full_file_upload_path.uri()))
            except ValueError:
                self.unable_to_upload_files.append(path)
        else:
            self.copies_list.append((path, full_file_upload_path.uri()))

        with self.mutex:
            if self.move_progress:
                # pylint: disable=not-callable
                self.move_progress.next()

예제 #3

파일 보기

 def _copy_to_ingest_bucket(self, path: str,
                            full_file_upload_path: GcsfsFilePath) -> None:
     try:
         mimetype, _ = guess_type(os.path.basename(path))
         self.gcsfs.mv(
             src_path=GcsfsFilePath.from_absolute_path(path),
             dst_path=full_file_upload_path,
         )
         self.gcsfs.set_content_type(full_file_upload_path,
                                     mimetype if mimetype else "text/plain")
         logging.info("Copied %s -> %s", path, full_file_upload_path.uri())
         self.uploaded_files.append(path)
     except BaseException as e:
         logging.warning(
             "Could not copy %s -> %s due to error %s",
             path,
             full_file_upload_path.uri(),
             e.args,
         )
         self.unable_to_upload_files.append(path)

예제 #4

파일 보기

파일: gcsfs_csv_reader.py 프로젝트: Leo-Ryu/pulse-data

    def _file_pointer_for_path(self, path: GcsfsFilePath, encoding: str):
        """Returns a file pointer for the given path."""

        # From the GCSFileSystem docs (https://gcsfs.readthedocs.io/en/latest/api.html#gcsfs.core.GCSFileSystem),
        # 'google_default' means we should look for local credentials set up via `gcloud login`. The project this is
        # reading from may have to match the project default you have set locally (check via `gcloud info` and set via
        # `gcloud config set project [PROJECT_ID]`. If we are running in the GAE environment, we should be able to query
        # the internal metadata for credentials.
        token = 'google_default' if not environment.in_gae() else 'cloud'
        return self.gcs_file_system.open(path.uri(),
                                         encoding=encoding,
                                         token=token)

예제 #5

파일 보기

파일: gcs_file_system.py 프로젝트: Recidiviz/pulse-data

 def open(
     self,
     path: GcsfsFilePath,
     chunk_size: Optional[int] = None,
     encoding: Optional[str] = None,
 ) -> Iterator[TextIO]:
     blob = self._get_blob(path)
     with blob.open("rb", chunk_size=chunk_size) as f:
         verifiable_reader = VerifiableBytesReader(f, name=path.uri())
         try:
             yield TextIOWrapper(buffer=verifiable_reader,
                                 encoding=encoding)
         finally:
             verifiable_reader.verify_crc32c(blob.crc32c)

예제 #6

파일 보기

파일: gcs_file_system.py 프로젝트: Recidiviz/pulse-data

    def _get_blob(self, path: GcsfsFilePath) -> storage.Blob:
        try:
            bucket = self.storage_client.bucket(path.bucket_name)
            blob = bucket.get_blob(path.blob_name)
        except NotFound as error:
            logging.warning(
                "Blob at [%s] does not exist - might have already been deleted",
                path.uri(),
            )

            raise GCSBlobDoesNotExistError(
                f"Blob at [{path.uri()}] does not exist") from error
        else:
            if not blob:
                logging.warning(
                    "Blob at [%s] does not exist - might have already been deleted",
                    path.uri(),
                )

                raise GCSBlobDoesNotExistError(
                    f"Blob at [{path.uri()}] does not exist")

            return blob

예제 #7

파일 보기

파일: data_discovery_test.py 프로젝트: Recidiviz/pulse-data

 def cache_ingest_file(self,
                       path: GcsfsFilePath,
                       csv_text: str,
                       separator: str = ",") -> None:
     self.fs.upload_from_string(path, csv_text, content_type="text/csv")
     response = self.test_client.post(
         "/data_discovery/cache_ingest_file_as_parquet_task",
         json={
             "gcs_file_uri": path.uri(),
             "file_encoding": "UTF-8",
             "file_separator": separator,
             "file_quoting": csv.QUOTE_MINIMAL,
         },
     )
     self.assertEqual(HTTPStatus.CREATED, response.status_code)

예제 #8

파일 보기

def build_cache_ingest_file_as_parquet_task(
    gcs_file: GcsfsFilePath,
    separator: str,
    encoding: str,
    quoting: int,
    custom_line_terminator: Optional[str],
) -> Dict[str, Any]:
    body = {
        "gcs_file_uri": gcs_file.uri(),
        "file_separator": separator,
        "file_encoding": encoding,
        "file_quoting": quoting,
        "file_custom_line_terminator": custom_line_terminator,
    }
    if custom_line_terminator:
        body["file_custom_line_terminator"] = custom_line_terminator

    return {
        "relative_uri":
        "/admin/data_discovery/cache_ingest_file_as_parquet_task",
        "body": body,
    }