def credentials(self) -> Optional[auth_credentials.Credentials]: """Default credentials.""" if self._credentials: return self._credentials logger = logging.getLogger("google.auth._default") logging_warning_filter = utils.LoggingFilter(logging.WARNING) logger.addFilter(logging_warning_filter) credentials, _ = google.auth.default() logger.removeFilter(logging_warning_filter) return credentials
def _retrieve_gcs_source_columns( project: str, gcs_csv_file_path: str, credentials: Optional[auth_credentials.Credentials] = None, ) -> List[str]: """Retrieve the columns from a comma-delimited CSV file stored on Google Cloud Storage Example Usage: column_names = _retrieve_gcs_source_columns( "project_id", "gs://example-bucket/path/to/csv_file" ) # column_names = ["column_1", "column_2"] Args: project (str): Required. Project to initiate the Google Cloud Storage client with. gcs_csv_file_path (str): Required. A full path to a CSV files stored on Google Cloud Storage. Must include "gs://" prefix. credentials (auth_credentials.Credentials): Credentials to use to with GCS Client. Returns: List[str] A list of columns names in the CSV file. Raises: RuntimeError: When the retrieved CSV file is invalid. """ gcs_bucket, gcs_blob = utils.extract_bucket_and_prefix_from_gcs_path( gcs_csv_file_path) client = storage.Client(project=project, credentials=credentials) bucket = client.bucket(gcs_bucket) blob = bucket.blob(gcs_blob) # Incrementally download the CSV file until the header is retrieved first_new_line_index = -1 start_index = 0 increment = 1000 line = "" try: logger = logging.getLogger("google.resumable_media._helpers") logging_warning_filter = utils.LoggingFilter(logging.INFO) logger.addFilter(logging_warning_filter) while first_new_line_index == -1: line += blob.download_as_bytes(start=start_index, end=start_index + increment).decode("utf-8") first_new_line_index = line.find("\n") start_index += increment header_line = line[:first_new_line_index] # Split to make it an iterable header_line = header_line.split("\n")[:1] csv_reader = csv.reader(header_line, delimiter=",") except (ValueError, RuntimeError) as err: raise RuntimeError( "There was a problem extracting the headers from the CSV file at '{}': {}" .format(gcs_csv_file_path, err)) finally: logger.removeFilter(logging_warning_filter) return next(csv_reader)