def download_file(self, key, local_path, bucket_name=None, use_basename=True): """ Download a file from S3. Args: key: `str`. S3 key that will point to the file. local_path: `str`. the path to download to. bucket_name: `str`. Name of the bucket in which to store the file. use_basename: `bool`. whether or not to use the basename of the key. """ if not bucket_name: bucket_name, key = self.parse_s3_url(key) local_path = os.path.abspath(local_path) if use_basename: local_path = append_basename(local_path, key) try: check_dirname_exists(local_path) except PolyaxonPathException as e: raise PolyaxonStoresException("Connection error: %s" % e) from e try: self.connection.download_file(bucket_name, key, local_path) except ClientError as e: raise PolyaxonStoresException("Connection error: %s" % e) from e
def download_file(self, blob, local_path, bucket_name=None, use_basename=True): """ Downloads a file from Google Cloud Storage. Args: blob: `str`. blob to download. local_path: `str`. the path to download to. bucket_name: `str`. the name of the bucket. use_basename: `bool`. whether or not to use the basename of the blob. """ if not bucket_name: bucket_name, blob = self.parse_gcs_url(blob) local_path = os.path.abspath(local_path) if use_basename: local_path = append_basename(local_path, blob) try: check_dirname_exists(local_path) except PolyaxonPathException as e: raise PolyaxonStoresException("Connection error: %s" % e) from e try: blob = self.get_blob(blob=blob, bucket_name=bucket_name) blob.download_to_filename(local_path) except (NotFound, GoogleAPIError) as e: raise PolyaxonStoresException("Connection error: %s" % e) from e
def download_file(self, blob, local_path, container_name=None, use_basename=True): """ Downloads a file from Azure Blob service. Args: blob: `str`. blob to download. local_path: `str`. the path to download to. container_name: `str`. the name of the container. use_basename: `bool`. whether or not to use the basename of the blob. """ if not container_name: container_name, _, blob = self.parse_wasbs_url(blob) local_path = os.path.abspath(local_path) if use_basename: local_path = append_basename(local_path, blob) try: check_dirname_exists(local_path) except PolyaxonPathException as e: raise PolyaxonStoresException("Connection error: %s" % e) from e client = self.connection.get_container_client(container_name) try: with open(local_path, "wb") as file: client.download_blob(blob).readinto(file) except HttpResponseError as e: raise PolyaxonStoresException("Connection error: %s" % e) from e
def download_file(self, blob, local_path, container_name=None, use_basename=True): """ Downloads a file from Google Cloud Storage. Args: blob: `str`. blob to download. local_path: `str`. the path to download to. container_name: `str`. the name of the container. use_basename: `bool`. whether or not to use the basename of the blob. """ if not container_name: container_name, _, blob = self.parse_wasbs_url(blob) local_path = os.path.abspath(local_path) if use_basename: local_path = append_basename(local_path, blob) try: check_dirname_exists(local_path) except PolyaxonPathException as e: raise PolyaxonStoresException("Connection error: %s" % e) from e try: self.connection.get_blob_to_path(container_name, blob, local_path) except AzureHttpError as e: raise PolyaxonStoresException("Connection error: %s" % e) from e
def download_dir( self, blob, local_path, container_name=None, use_basename=True, workers=0 ): """ Download a directory from Azure Blob service. Args: blob: `str`. blob to download. local_path: `str`. the path to download to. container_name: `str`. the name of the container. use_basename: `bool`. whether or not to use the basename of the key. workers: number of workers threads to use for parallel execution. """ if not container_name: container_name, _, blob = self.parse_wasbs_url(blob) local_path = os.path.abspath(local_path) if use_basename: local_path = append_basename(local_path, blob) try: check_dirname_exists(local_path, is_dir=True) except PolyaxonPathException: os.makedirs(local_path) results = self.list(container_name=container_name, key=blob, delimiter="/") # Create directories for prefix in sorted(results["prefixes"]): direname = os.path.join(local_path, prefix) prefix = os.path.join(blob, prefix) # Download files under self.download_dir( blob=prefix, local_path=direname, container_name=container_name, use_basename=False, ) pool, future_results = self.init_pool(workers) # Download files for file_key in results["blobs"]: file_key = file_key[0] filename = os.path.join(local_path, file_key) file_key = os.path.join(blob, file_key) future_results = self.submit_pool( workers=workers, pool=pool, future_results=future_results, fn=self.download_file, blob=file_key, local_path=filename, container_name=container_name, use_basename=False, ) if workers: futures.wait(future_results) self.close_pool(pool=pool)
def get_from_path(context_path: str, keys: Union[Set[str], List[str], str]) -> Any: """ Returns a variable from one of the list of keys based on a base path. Args: context_path: str, base path where to look for keys. keys: list(str). list of keys to check in the environment Returns: str | None """ if not check_dirname_exists(context_path, is_dir=True): return None keys = keys or [] if not isinstance(keys, (list, tuple)): keys = [keys] for key in keys: key_path = os.path.join(context_path, key) if not os.path.exists(key_path): return None with open(key_path) as f: value = f.read() if value: if value.lower() == "true": return True if value.lower() == "false": return False return value return None
def download_dir( self, key: str, local_path: str, bucket_name: str = None, use_basename: bool = True, workers: int = 0, ): """ Download a directory from S3. Args: key: `str`. S3 key that will point to the file. local_path: `str`. the path to download to. bucket_name: `str`. Name of the bucket in which to store the file. use_basename: `bool`. whether or not to use the basename of the key. workers: number of workers threads to use for parallel execution. """ if not bucket_name: bucket_name, key = self.parse_s3_url(key) local_path = os.path.abspath(local_path) if use_basename: local_path = append_basename(local_path, key) try: check_dirname_exists(local_path, is_dir=True) except PolyaxonPathException: os.makedirs(local_path) results = self.list(bucket_name=bucket_name, prefix=key, delimiter="/") # Create directories for prefix in sorted(results["prefixes"]): direname = os.path.join(local_path, prefix) prefix = os.path.join(key, prefix) # Download files under self.download_dir( key=prefix, local_path=direname, bucket_name=bucket_name, use_basename=False, ) pool, future_results = self.init_pool(workers) # Download files for file_key in results["keys"]: file_key = file_key[0] filename = os.path.join(local_path, file_key) file_key = os.path.join(key, file_key) future_results = self.submit_pool( workers=workers, pool=pool, future_results=future_results, fn=self.download_file, key=file_key, local_path=filename, bucket_name=bucket_name, use_basename=False, ) if workers: futures.wait(future_results) self.close_pool(pool=pool)