Ejemplo n.º 1
0
 def credentials(self) -> Optional[auth_credentials.Credentials]:
     """Default credentials."""
     if self._credentials:
         return self._credentials
     logger = logging.getLogger("google.auth._default")
     logging_warning_filter = utils.LoggingFilter(logging.WARNING)
     logger.addFilter(logging_warning_filter)
     credentials, _ = google.auth.default()
     logger.removeFilter(logging_warning_filter)
     return credentials
    def _retrieve_gcs_source_columns(
        project: str,
        gcs_csv_file_path: str,
        credentials: Optional[auth_credentials.Credentials] = None,
    ) -> List[str]:
        """Retrieve the columns from a comma-delimited CSV file stored on Google Cloud Storage

        Example Usage:

            column_names = _retrieve_gcs_source_columns(
                "project_id",
                "gs://example-bucket/path/to/csv_file"
            )

            # column_names = ["column_1", "column_2"]

        Args:
            project (str):
                Required. Project to initiate the Google Cloud Storage client with.
            gcs_csv_file_path (str):
                Required. A full path to a CSV files stored on Google Cloud Storage.
                Must include "gs://" prefix.
            credentials (auth_credentials.Credentials):
                Credentials to use to with GCS Client.
        Returns:
            List[str]
                A list of columns names in the CSV file.

        Raises:
            RuntimeError: When the retrieved CSV file is invalid.
        """

        gcs_bucket, gcs_blob = utils.extract_bucket_and_prefix_from_gcs_path(
            gcs_csv_file_path)
        client = storage.Client(project=project, credentials=credentials)
        bucket = client.bucket(gcs_bucket)
        blob = bucket.blob(gcs_blob)

        # Incrementally download the CSV file until the header is retrieved
        first_new_line_index = -1
        start_index = 0
        increment = 1000
        line = ""

        try:
            logger = logging.getLogger("google.resumable_media._helpers")
            logging_warning_filter = utils.LoggingFilter(logging.INFO)
            logger.addFilter(logging_warning_filter)

            while first_new_line_index == -1:
                line += blob.download_as_bytes(start=start_index,
                                               end=start_index +
                                               increment).decode("utf-8")

                first_new_line_index = line.find("\n")
                start_index += increment

            header_line = line[:first_new_line_index]

            # Split to make it an iterable
            header_line = header_line.split("\n")[:1]

            csv_reader = csv.reader(header_line, delimiter=",")
        except (ValueError, RuntimeError) as err:
            raise RuntimeError(
                "There was a problem extracting the headers from the CSV file at '{}': {}"
                .format(gcs_csv_file_path, err))
        finally:
            logger.removeFilter(logging_warning_filter)

        return next(csv_reader)