def generate(self): s3_path = parse_uri(self.source) params = { "Bucket": s3_path.bucket_id, "Key": s3_path.key_id, } response = self.client.generate_presigned_url("get_object", Params=params, ExpiresIn=86400) return response
def parse_uri(path_or_uri): from smart_open import parse_uri try: return parse_uri(path_or_uri) except NotImplementedError as e: # Snakemake sees a lot of URIs which are not supported by smart_open yet # "docker", "git+file", "shub", "ncbi","root","roots","rootk", "gsiftp", # "srm","ega","ab","dropbox" # Fall back to a simple split if we encounter something which isn't supported. scheme, _, uri_path = path_or_uri.partition("://") if scheme and uri_path: uri = collections.namedtuple("Uri", ["scheme", "uri_path"]) return uri(scheme, uri_path) else: raise e
def smart_join(base, path, abspath=False): if is_local_file(base): full = os.path.join(base, path) if abspath: return os.path.abspath(full) return full else: from smart_open import parse_uri uri = parse_uri("{}/{}".format(base, path)) if not ON_WINDOWS: # Norm the path such that it does not contain any ../, # which is invalid in an URL. assert uri.uri_path[0] == "/" uri_path = os.path.normpath(uri.uri_path) else: uri_path = uri.uri_path return "{scheme}:/{uri_path}".format(scheme=uri.scheme, uri_path=uri_path)
def __init__(self, file): self.temporary_folder = None self.file_path = None self._is_django_form_file = False self._django_form_file = None self._original_file_uri = None self._smart_open_uri = None if isinstance(file, UploadedFile): # File provided by django form self._django_form_file = file self._is_django_form_file = True elif isinstance(file, str): self._is_django_form_file = False self._original_file_uri = file self._smart_open_uri = smart_open.parse_uri(file) else: raise ValueError()
def _open(path: str, mode: str) -> IO[bytes]: if mode == 'wb' and path.startswith('s3://'): import botocore.config # type: ignore import boto3 # type: ignore # # We can't do "import datawelder.s3" here because it causes an # UnboundLocalError when we try to touch datawelder.readwrite at the # end of the function on Py3.8.5. # from datawelder import s3 # # The default S3 writers in smart_open are too memory-hungry, so use # a custom implementation here. # uri = smart_open.parse_uri(path) config = botocore.config.Config(retries={ 'mode': 'standard', 'max_attempts': 10 }) client_params = {'config': config} try: endpoint_url = os.environ['AWS_ENDPOINT_URL'] except KeyError: pass else: client_params['endpoint_url'] = endpoint_url client = boto3.client('s3', **client_params) fileobj = s3.LightweightWriter( uri.bucket_id, uri.key_id, min_part_size=datawelder.s3.MIN_MIN_PART_SIZE, client=client, ) if path.endswith('.gz'): return gzip.GzipFile(fileobj=fileobj, mode=mode) # type: ignore return fileobj # type: ignore return datawelder.readwrite.open(path, mode)
def parse_s3_url(url): source_path_parsed = parse_uri(url) return source_path_parsed.bucket_id, source_path_parsed.key_id
def is_local_file(path_or_uri): return parse_uri(path_or_uri).scheme == "file"
def get_relative_output_path(self): uri = parse_uri(self.job.output_url) if uri.scheme in ["s3", "wasabi"]: return uri.key_id
def is_local_file(path_or_uri): from smart_open import parse_uri return parse_uri(path_or_uri).scheme == "file"
def get(input_url): if parse_uri(input_url).scheme == "s3": return S3InputOptions() elif parse_uri(input_url).scheme == "wasabi": return WasabiInputOptions() return FileStorageInputOptions()
def _copyfunct(self, src_file, dest_file, overwrite=False): dest_scheme = parse_uri(dest_file).scheme src_scheme = parse_uri(src_file).scheme if dest_scheme == 's3' or src_scheme == 's3': return self.scopy(src_file, dest_file, overwrite) return self.gcopy(src_file, dest_file, overwrite)
def parse_s3(cls, s3_url): s = parse_uri(s3_url) return {'bucket': s.bucket, 'prefix': s.blob_id}
def get_file_from_s3(self, url): s3_path = parse_uri(url) conn = boto3.resource("s3", region_name=settings.AWS_S3_REGION_CODE) return conn.Object(s3_path.bucket_id, s3_path.key_id).get()["Body"]
def mock_read_file(path, *args, **kwargs): file_path = parse_uri(path).key_id return open(file_path).read()