def download_blob(self, bucket_name, blob_name, local_path=None): """ Gets a blob from a bucket `Args:` bucket_name: str The name of the bucket blob_name: str The name of the blob local_path: str The local path where the file will be downloaded. If not specified, a temporary file will be created and returned, and that file will be removed automatically when the script is done running. `Returns:` str The path of the downloaded file """ if not local_path: local_path = files.create_temp_file_for_path('TEMPTHING') bucket = storage.Bucket(self.client, name=bucket_name) blob = storage.Blob(blob_name, bucket) logger.info(f'Downloading {blob_name} from {bucket_name} bucket.') with open(local_path, 'wb') as f: blob.download_to_file(f, client=self.client) logger.info(f'{blob_name} saved to {local_path}.') return local_path
def download_blob(self, container_name, blob_name, local_path=None): """ Downloads a blob from a container into the specified file path or a temporary file path `Args:` container_name: str The container name blob_name: str The blob name local_path: Optional[str] The local path where the file will be downloaded. If not specified, a temporary file will be created and returned, and that file will be removed automatically when the script is done running. `Returns:` str The path of the downloaded file """ if not local_path: local_path = files.create_temp_file_for_path('TEMPFILEAZURE') blob_client = self.get_blob(container_name, blob_name) logger.info(f'Downloading {blob_name} blob from {container_name} container.') with open(local_path, 'wb') as f: blob_client.download_blob().readinto(f) logger.info(f'{blob_name} blob saved to {local_path}.') return local_path
def get_file(self, remote_path, local_path=None, connection=None): """ Download a file from the SFTP server `Args:` remote_path: str The remote path of the file to download local_path: str The local path where the file will be downloaded. If not specified, a temporary file will be created and returned, and that file will be removed automatically when the script is done running. connection: obj An SFTP connection object `Returns:` str The path of the local file """ if not local_path: local_path = files.create_temp_file_for_path(remote_path) if connection: connection.get(remote_path, local_path) with self.create_connection() as connection: connection.get(remote_path, local_path) return local_path
def get_file(self, bucket, key, local_path=None, **kwargs): """ Download an object from S3 to a local file `Args:` local_path: str The local path where the file will be downloaded. If not specified, a temporary file will be created and returned, and that file will be removed automatically when the script is done running. bucket: str The bucket name key: str The object key kwargs: Additional arguments for the S3 API call. See `AWS download_file documentation <https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_file>`_ for more info. `Returns:` str The path of the new file """ if not local_path: local_path = files.create_temp_file_for_path(key) self.s3.Object(bucket, key).download_file(local_path, ExtraArgs=kwargs) return local_path
def get_file(self, bucket, key, local_path=None): """ Download an object from S3 to a local file `Args:` local_path: str The local path where the file will be downloaded. If not specified, a temporary file will be created and returned, and that file will be removed automatically when the script is done running. bucket: str The bucket name key: str The object key `Returns:` str The path of the new file """ if not local_path: local_path = files.create_temp_file_for_path(key) self.s3.Object(bucket, key).download_file(local_path) return local_path
def download_file(self, path: str, local_path: str = None) -> str: """Download a Box object to a local file. `Args`: path: str The slash-separated path to the file in Box. local_path: str The local path where the file will be downloaded. If not specified, a temporary file will be created and returned, and that file will be removed automatically when the script is done running. `Returns:` str The path of the new file """ if not local_path: # Temp file will be around as long as enclosing process is running, # which we need, because the Table we return will continue to use it. local_path = create_temp_file_for_path(path) file_id = self.get_item_id(path) with open(local_path, 'wb') as output_file: self.client.file(file_id).download_to(output_file) return local_path
def download_file(self, repo_name, path, branch=None, local_path=None): """Download a file from a repo by path and branch. Defaults to the repo's default branch if branch is not supplied. Uses the download_url directly rather than the API because the API only supports contents up to 1MB from a repo directly, and the process for downloading larger files through the API is much more involved. Because download_url does not go through the API, it does not support username / password authentication, and requires a token to authenticate. Args: repo_name: str Full repo name (account/name) path: str Path from the repo base directory branch: Optional[str] Branch to download file from. Defaults to repo default branch local_path: Optional[str] Local file path to download file to. Will create a temp file if not supplied. Returns: str File path of downloaded file """ if not local_path: local_path = files.create_temp_file_for_path(path) repo = self.client.get_repo(repo_name) if branch is None: branch = repo.default_branch logger.info( f'Downloading {path} from {repo_name}, branch {branch} to {local_path}' ) headers = None if self.access_token: headers = { 'Authorization': f'token {self.access_token}', } res = requests.get( f'https://raw.githubusercontent.com/{repo_name}/{branch}/{path}', headers=headers) if res.status_code == 404: raise UnknownObjectException(status=404, data=res.content) elif res.status_code != 200: raise ParsonsGitHubError( f'Error downloading {path} from repo {repo_name}: {res.content}' ) with open(local_path, 'wb') as f: f.write(res.content) logger.info(f'Downloaded {path} to {local_path}') return local_path
def test_create_temp_file_for_path(): temp_path = files.create_temp_file_for_path('some/file.gz') assert temp_path[-3:] == '.gz'