Beispiel #1
0
 def generate(self):
     s3_path = parse_uri(self.source)
     params = {
         "Bucket": s3_path.bucket_id,
         "Key": s3_path.key_id,
     }
     response = self.client.generate_presigned_url("get_object",
                                                   Params=params,
                                                   ExpiresIn=86400)
     return response
Beispiel #2
0
def parse_uri(path_or_uri):
    from smart_open import parse_uri

    try:
        return parse_uri(path_or_uri)
    except NotImplementedError as e:
        # Snakemake sees a lot of URIs which are not supported by smart_open yet
        # "docker", "git+file", "shub", "ncbi","root","roots","rootk", "gsiftp",
        # "srm","ega","ab","dropbox"
        # Fall back to a simple split if we encounter something which isn't supported.
        scheme, _, uri_path = path_or_uri.partition("://")
        if scheme and uri_path:
            uri = collections.namedtuple("Uri", ["scheme", "uri_path"])
            return uri(scheme, uri_path)
        else:
            raise e
Beispiel #3
0
def smart_join(base, path, abspath=False):
    if is_local_file(base):
        full = os.path.join(base, path)
        if abspath:
            return os.path.abspath(full)
        return full
    else:
        from smart_open import parse_uri

        uri = parse_uri("{}/{}".format(base, path))
        if not ON_WINDOWS:
            # Norm the path such that it does not contain any ../,
            # which is invalid in an URL.
            assert uri.uri_path[0] == "/"
            uri_path = os.path.normpath(uri.uri_path)
        else:
            uri_path = uri.uri_path
        return "{scheme}:/{uri_path}".format(scheme=uri.scheme, uri_path=uri_path)
    def __init__(self, file):
        self.temporary_folder = None
        self.file_path = None

        self._is_django_form_file = False
        self._django_form_file = None
        self._original_file_uri = None
        self._smart_open_uri = None

        if isinstance(file, UploadedFile):
            # File provided by django form
            self._django_form_file = file
            self._is_django_form_file = True
        elif isinstance(file, str):
            self._is_django_form_file = False
            self._original_file_uri = file
            self._smart_open_uri = smart_open.parse_uri(file)
        else:
            raise ValueError()
Beispiel #5
0
def _open(path: str, mode: str) -> IO[bytes]:
    if mode == 'wb' and path.startswith('s3://'):
        import botocore.config  # type: ignore
        import boto3  # type: ignore
        #
        # We can't do "import datawelder.s3" here because it causes an
        # UnboundLocalError when we try to touch datawelder.readwrite at the
        # end of the function on Py3.8.5.
        #
        from datawelder import s3
        #
        # The default S3 writers in smart_open are too memory-hungry, so use
        # a custom implementation here.
        #
        uri = smart_open.parse_uri(path)

        config = botocore.config.Config(retries={
            'mode': 'standard',
            'max_attempts': 10
        })
        client_params = {'config': config}
        try:
            endpoint_url = os.environ['AWS_ENDPOINT_URL']
        except KeyError:
            pass
        else:
            client_params['endpoint_url'] = endpoint_url

        client = boto3.client('s3', **client_params)
        fileobj = s3.LightweightWriter(
            uri.bucket_id,
            uri.key_id,
            min_part_size=datawelder.s3.MIN_MIN_PART_SIZE,
            client=client,
        )
        if path.endswith('.gz'):
            return gzip.GzipFile(fileobj=fileobj, mode=mode)  # type: ignore
        return fileobj  # type: ignore

    return datawelder.readwrite.open(path, mode)
Beispiel #6
0
def parse_s3_url(url):
    source_path_parsed = parse_uri(url)
    return source_path_parsed.bucket_id, source_path_parsed.key_id
Beispiel #7
0
def is_local_file(path_or_uri):
    return parse_uri(path_or_uri).scheme == "file"
 def get_relative_output_path(self):
     uri = parse_uri(self.job.output_url)
     if uri.scheme in ["s3", "wasabi"]:
         return uri.key_id
Beispiel #9
0
def is_local_file(path_or_uri):
    from smart_open import parse_uri

    return parse_uri(path_or_uri).scheme == "file"
Beispiel #10
0
 def get(input_url):
     if parse_uri(input_url).scheme == "s3":
         return S3InputOptions()
     elif parse_uri(input_url).scheme == "wasabi":
         return WasabiInputOptions()
     return FileStorageInputOptions()
Beispiel #11
0
 def _copyfunct(self, src_file, dest_file, overwrite=False):
     dest_scheme = parse_uri(dest_file).scheme
     src_scheme = parse_uri(src_file).scheme
     if dest_scheme == 's3' or src_scheme == 's3':
         return self.scopy(src_file, dest_file, overwrite)
     return self.gcopy(src_file, dest_file, overwrite)
Beispiel #12
0
 def parse_s3(cls, s3_url):
     s = parse_uri(s3_url)
     return {'bucket': s.bucket, 'prefix': s.blob_id}
Beispiel #13
0
 def get_file_from_s3(self, url):
     s3_path = parse_uri(url)
     conn = boto3.resource("s3", region_name=settings.AWS_S3_REGION_CODE)
     return conn.Object(s3_path.bucket_id, s3_path.key_id).get()["Body"]
Beispiel #14
0
def mock_read_file(path, *args, **kwargs):
    file_path = parse_uri(path).key_id
    return open(file_path).read()