def open( bucket_id, key_id, mode, version_id=None, buffer_size=DEFAULT_BUFFER_SIZE, min_part_size=DEFAULT_MIN_PART_SIZE, multipart_upload=True, session=None, defer_seek=False, client=None, client_kwargs=None, writebuffer=None, ): return s3.open( bucket_id, key_id, mode, version_id, buffer_size, min_part_size, multipart_upload_kwargs=None, multipart_upload=multipart_upload, session=session, resource_kwargs={"endpoint_url": settings.WASABI_S3_ENDPOINT_URL}, singlepart_upload_kwargs=None, object_kwargs=None, )
def s3_open_key(key, mode, **kwargs): logger.debug('%r', locals()) # # TODO: handle boto3 keys as well # host = kwargs.pop('host', None) if host is not None: kwargs['endpoint_url'] = 'http://' + host if kwargs.pop("ignore_extension", False): codec = None else: codec = _detect_codec(key.name) # # Codecs work on a byte-level, so the underlying S3 object should # always be reading bytes. # if mode in (smart_open_s3.READ, smart_open_s3.READ_BINARY): s3_mode = smart_open_s3.READ_BINARY elif mode in (smart_open_s3.WRITE, smart_open_s3.WRITE_BINARY): s3_mode = smart_open_s3.WRITE_BINARY else: raise NotImplementedError('mode %r not implemented for S3' % mode) logging.debug('codec: %r mode: %r s3_mode: %r', codec, mode, s3_mode) encoding = kwargs.get('encoding') errors = kwargs.get('errors', DEFAULT_ERRORS) fobj = smart_open_s3.open(key.bucket.name, key.name, s3_mode, **kwargs) decompressed_fobj = _CODECS[codec](fobj, mode) decoded_fobj = encoding_wrapper(decompressed_fobj, mode, encoding=encoding, errors=errors) return decoded_fobj
def _s3_open_uri(parsed_uri, mode, transport_params): logger.debug('s3_open_uri: %r', locals()) if mode in ('r', 'w'): raise ValueError('this function can only open binary streams. ' 'Use smart_open.smart_open() to open text streams.') elif mode not in ('rb', 'wb'): raise NotImplementedError('unsupported mode: %r', mode) # # There are two explicit ways we can receive session parameters from the user. # # 1. Via the session keyword argument (transport_params) # 2. Via the URI itself # # They are not mutually exclusive, but we have to pick one of the two. # Go with 1). # if transport_params.get('session') is not None and ( parsed_uri.access_id or parsed_uri.access_secret): logger.warning( 'ignoring credentials parsed from URL because they conflict with ' 'transport_params.session. Set transport_params.session to None ' 'to suppress this warning.') elif (parsed_uri.access_id and parsed_uri.access_secret): transport_params['session'] = boto3.Session( aws_access_key_id=parsed_uri.access_id, aws_secret_access_key=parsed_uri.access_secret, ) kwargs = _check_kwargs(smart_open_s3.open, transport_params) return smart_open_s3.open(parsed_uri.bucket_id, parsed_uri.key_id, mode, **kwargs)
def s3_open_key(key, mode, **kwargs): logger.debug('%r', locals()) # # TODO: handle boto3 keys as well # host = kwargs.pop('host', None) if host is not None: kwargs['endpoint_url'] = 'http://' + host if kwargs.pop("ignore_extension", False): codec = None else: codec = _detect_codec(key.name) # # Codecs work on a byte-level, so the underlying S3 object should # always be reading bytes. # if codec and mode in (smart_open_s3.READ, smart_open_s3.READ_BINARY): s3_mode = smart_open_s3.READ_BINARY elif codec and mode in (smart_open_s3.WRITE, smart_open_s3.WRITE_BINARY): s3_mode = smart_open_s3.WRITE_BINARY else: s3_mode = mode logging.debug('codec: %r mode: %r s3_mode: %r', codec, mode, s3_mode) fobj = smart_open_s3.open(key.bucket.name, key.name, s3_mode, **kwargs) return _CODECS[codec](fobj, mode)
def s3_open_uri(parsed_uri, mode, **kwargs): logger.debug('%r', locals()) if parsed_uri.access_id is not None: kwargs['aws_access_key_id'] = parsed_uri.access_id if parsed_uri.access_secret is not None: kwargs['aws_secret_access_key'] = parsed_uri.access_secret # Get an S3 host. It is required for sigv4 operations. host = kwargs.pop('host', None) if host is not None: kwargs['endpoint_url'] = 'http://' + host # # TODO: this is the wrong place to handle ignore_extension. # It should happen at the highest level in the smart_open function, because # it influences other file systems as well, not just S3. # if kwargs.pop("ignore_extension", False): codec = None else: codec = _detect_codec(parsed_uri.key_id) # # Codecs work on a byte-level, so the underlying S3 object should # always be reading bytes. # if mode in (smart_open_s3.READ, smart_open_s3.READ_BINARY): s3_mode = smart_open_s3.READ_BINARY elif mode in (smart_open_s3.WRITE, smart_open_s3.WRITE_BINARY): s3_mode = smart_open_s3.WRITE_BINARY else: raise NotImplementedError('mode %r not implemented for S3' % mode) # # TODO: I'm not sure how to handle this with boto3. Any ideas? # # https://github.com/boto/boto3/issues/334 # # _setup_unsecured_mode() encoding = kwargs.get('encoding') errors = kwargs.get('errors', DEFAULT_ERRORS) fobj = smart_open_s3.open(parsed_uri.bucket_id, parsed_uri.key_id, s3_mode, **kwargs) decompressed_fobj = _CODECS[codec](fobj, mode) decoded_fobj = encoding_wrapper(decompressed_fobj, mode, encoding=encoding, errors=errors) return decoded_fobj
def _s3_open_uri(parsed_uri, mode, transport_params): logger.debug('s3_open_uri: %r', locals()) if mode in ('r', 'w'): raise ValueError('this function can only open binary streams. ' 'Use smart_open.smart_open() to open text streams.') elif mode not in ('rb', 'wb'): raise NotImplementedError('unsupported mode: %r', mode) # # There are two explicit ways we can receive session parameters from the user. # # 1. Via the session keyword argument (transport_params) # 2. Via the URI itself # # They are not mutually exclusive, but we have to pick one of the two. # Go with 1). # if transport_params.get('session') is not None and ( parsed_uri.access_id or parsed_uri.access_secret): logger.warning( 'ignoring credentials parsed from URL because they conflict with ' 'transport_params.session. Set transport_params.session to None ' 'to suppress this warning.') elif (parsed_uri.access_id and parsed_uri.access_secret): transport_params['session'] = boto3.Session( aws_access_key_id=parsed_uri.access_id, aws_secret_access_key=parsed_uri.access_secret, ) # # There are two explicit ways the user can provide the endpoint URI: # # 1. Via the URL. The protocol is implicit, and we assume HTTPS in this case. # 2. Via the resource_kwargs and multipart_upload_kwargs endpoint_url parameter. # # Again, these are not mutually exclusive: the user can specify both. We # have to pick one to proceed, however, and we go with 2. # if parsed_uri.host != _DEFAULT_S3_HOST: endpoint_url = 'https://%s:%d' % (parsed_uri.host, parsed_uri.port) _override_endpoint_url(transport_params, endpoint_url) kwargs = _check_kwargs(smart_open_s3.open, transport_params) return smart_open_s3.open(parsed_uri.bucket_id, parsed_uri.key_id, mode, **kwargs)
def _s3_open_uri(parsed_uri, mode, **kwargs): logger.debug('s3_open_uri: %r', locals()) if mode in ('r', 'w'): raise ValueError('this function can only open binary streams. ' 'Use smart_open.smart_open() to open text streams.') elif mode not in ('rb', 'wb'): raise NotImplementedError('unsupported mode: %r', mode) if parsed_uri.access_id is not None: kwargs['aws_access_key_id'] = parsed_uri.access_id if parsed_uri.access_secret is not None: kwargs['aws_secret_access_key'] = parsed_uri.access_secret # Get an S3 host. It is required for sigv4 operations. host = kwargs.pop('host', None) if host is not None: kwargs['endpoint_url'] = _add_scheme_to_host(host) return smart_open_s3.open(parsed_uri.bucket_id, parsed_uri.key_id, mode, **kwargs)
def _s3_open_uri(parsed_uri, mode, **kwargs): logger.debug('s3_open_uri: %r', locals()) if mode in ('r', 'w'): raise ValueError('this function can only open binary streams. ' 'Use smart_open.smart_open() to open text streams.') elif mode not in ('rb', 'wb'): raise NotImplementedError('unsupported mode: %r', mode) if parsed_uri.access_id is not None: kwargs['aws_access_key_id'] = parsed_uri.access_id if parsed_uri.access_secret is not None: kwargs['aws_secret_access_key'] = parsed_uri.access_secret # Get an S3 host. It is required for sigv4 operations. host = kwargs.pop('host', None) if host is not None: kwargs['endpoint_url'] = 'http://' + host return smart_open_s3.open(parsed_uri.bucket_id, parsed_uri.key_id, mode, **kwargs)
def s3_open_uri(parsed_uri, mode, ignore_extension=False, **kwargs): logger.debug('%r', locals()) if parsed_uri.access_id is not None: kwargs['aws_access_key_id'] = parsed_uri.access_id if parsed_uri.access_secret is not None: kwargs['aws_secret_access_key'] = parsed_uri.access_secret # Get an S3 host. It is required for sigv4 operations. host = kwargs.pop('host', None) if host is not None: kwargs['endpoint_url'] = 'http://' + host # # Codecs work on a byte-level, so the underlying S3 object should # always be reading bytes. # if mode in (smart_open_s3.READ, smart_open_s3.READ_BINARY): s3_mode = smart_open_s3.READ_BINARY elif mode in (smart_open_s3.WRITE, smart_open_s3.WRITE_BINARY): s3_mode = smart_open_s3.WRITE_BINARY else: raise NotImplementedError('mode %r not implemented for S3' % mode) # # TODO: I'm not sure how to handle this with boto3. Any ideas? # # https://github.com/boto/boto3/issues/334 # # _setup_unsecured_mode() encoding = kwargs.get('encoding') errors = kwargs.get('errors', DEFAULT_ERRORS) fobj = smart_open_s3.open(parsed_uri.bucket_id, parsed_uri.key_id, s3_mode, **kwargs) decompressed_fobj = compression_wrapper(fobj, parsed_uri.key_id, mode, ignore_extension) decoded_fobj = encoding_wrapper(decompressed_fobj, mode, encoding=encoding, errors=errors) return decoded_fobj
def _open_binary_stream(uri, mode, **kw): """Open an arbitrary URI in the specified binary mode. Not all modes are supported for all protocols. :arg uri: The URI to open. May be a string, or something else. :arg str mode: The mode to open with. Must be rb, wb or ab. :arg kw: TODO: document this. :returns: A file object and the filename :rtype: tuple """ if mode not in ('rb', 'rb+', 'wb', 'wb+', 'ab', 'ab+'): # # This should really be a ValueError, but for the sake of compatibility # with older versions, which raise NotImplementedError, we do the same. # raise NotImplementedError('unsupported mode: %r' % mode) if isinstance(uri, six.string_types): # this method just routes the request to classes handling the specific storage # schemes, depending on the URI protocol in `uri` filename = uri.split('/')[-1] parsed_uri = _parse_uri(uri) unsupported = "%r mode not supported for %r scheme" % (mode, parsed_uri.scheme) if parsed_uri.scheme in ("file", ): # local files -- both read & write supported # compression, if any, is determined by the filename extension (.gz, .bz2, .xz) fobj = io.open(parsed_uri.uri_path, mode) return fobj, filename elif parsed_uri.scheme in smart_open_s3.SUPPORTED_SCHEMES: return _s3_open_uri(parsed_uri, mode, **kw), filename elif parsed_uri.scheme in ("hdfs", ): if mode == 'rb': return smart_open_hdfs.CliRawInputBase(parsed_uri.uri_path), filename elif mode == 'wb': return smart_open_hdfs.CliRawOutputBase(parsed_uri.uri_path), filename else: raise NotImplementedError(unsupported) elif parsed_uri.scheme in ("webhdfs", ): if mode == 'rb': fobj = smart_open_webhdfs.BufferedInputBase(parsed_uri.uri_path, **kw) elif mode == 'wb': fobj = smart_open_webhdfs.BufferedOutputBase(parsed_uri.uri_path, **kw) else: raise NotImplementedError(unsupported) return fobj, filename elif parsed_uri.scheme.startswith('http'): # # The URI may contain a query string and fragments, which interfere # with out compressed/uncompressed estimation. # filename = P.basename(urlparse.urlparse(uri).path) if mode == 'rb': return smart_open_http.BufferedInputBase(uri, **kw), filename else: raise NotImplementedError(unsupported) else: raise NotImplementedError("scheme %r is not supported", parsed_uri.scheme) elif isinstance(uri, boto.s3.key.Key): logger.debug('%r', locals()) # # TODO: handle boto3 keys as well # host = kw.pop('host', None) if host is not None: kw['endpoint_url'] = _add_scheme_to_host(host) return smart_open_s3.open(uri.bucket.name, uri.name, mode, **kw), uri.name elif hasattr(uri, 'read'): # simply pass-through if already a file-like filename = '/tmp/unknown' return uri, filename else: raise TypeError('don\'t know how to handle uri %s' % repr(uri))
def _open_binary_stream(uri, mode, **kw): """Open an arbitrary URI in the specified binary mode. Not all modes are supported for all protocols. :arg uri: The URI to open. May be a string, or something else. :arg str mode: The mode to open with. Must be rb, wb or ab. :arg kw: TODO: document this. :returns: A file object and the filename :rtype: tuple """ if mode not in ('rb', 'rb+', 'wb', 'wb+', 'ab', 'ab+'): # # This should really be a ValueError, but for the sake of compatibility # with older versions, which raise NotImplementedError, we do the same. # raise NotImplementedError('unsupported mode: %r' % mode) if isinstance(uri, six.string_types): # this method just routes the request to classes handling the specific storage # schemes, depending on the URI protocol in `uri` filename = uri.split('/')[-1] parsed_uri = _parse_uri(uri) unsupported = "%r mode not supported for %r scheme" % (mode, parsed_uri.scheme) if parsed_uri.scheme in ("file", ): # local files -- both read & write supported # compression, if any, is determined by the filename extension (.gz, .bz2) fobj = io.open(parsed_uri.uri_path, mode) return fobj, filename elif parsed_uri.scheme in ("s3", "s3n", 's3u'): return _s3_open_uri(parsed_uri, mode, **kw), filename elif parsed_uri.scheme in ("hdfs", ): if mode == 'rb': return smart_open_hdfs.CliRawInputBase(parsed_uri.uri_path), filename elif mode == 'wb': return smart_open_hdfs.CliRawOutputBase(parsed_uri.uri_path), filename else: raise NotImplementedError(unsupported) elif parsed_uri.scheme in ("webhdfs", ): if mode == 'rb': fobj = smart_open_webhdfs.BufferedInputBase(parsed_uri.uri_path, **kw) elif mode == 'wb': fobj = smart_open_webhdfs.BufferedOutputBase(parsed_uri.uri_path, **kw) else: raise NotImplementedError(unsupported) return fobj, filename elif parsed_uri.scheme.startswith('http'): # # The URI may contain a query string and fragments, which interfere # with out compressed/uncompressed estimation. # filename = P.basename(urlparse.urlparse(uri).path) if mode == 'rb': return smart_open_http.BufferedInputBase(uri, **kw), filename else: raise NotImplementedError(unsupported) else: raise NotImplementedError("scheme %r is not supported", parsed_uri.scheme) elif isinstance(uri, boto.s3.key.Key): logger.debug('%r', locals()) # # TODO: handle boto3 keys as well # host = kw.pop('host', None) if host is not None: kw['endpoint_url'] = 'http://' + host return smart_open_s3.open(uri.bucket.name, uri.name, mode, **kw), uri.name elif hasattr(uri, 'read'): # simply pass-through if already a file-like filename = '/tmp/unknown' return uri, filename else: raise TypeError('don\'t know how to handle uri %s' % repr(uri))