def _my_urlsplit(url): """This is a hack to prevent the regular urlsplit from splitting around question marks. A question mark (?) in a URL typically indicates the start of a querystring, and the standard library's urlparse function handles the querystring separately. Unfortunately, question marks can also appear _inside_ the actual URL for some schemas like S3. Replaces question marks with newlines prior to splitting. This is safe because: 1. The standard library's urlsplit completely ignores newlines 2. Raw newlines will never occur in innocuous URLs. They are always URL-encoded. See Also -------- https://github.com/python/cpython/blob/3.7/Lib/urllib/parse.py https://github.com/RaRe-Technologies/smart_open/issues/285 """ if '?' not in url: return urlsplit(url, allow_fragments=False) sr = urlsplit(url.replace('?', '\n'), allow_fragments=False) SplitResult = collections.namedtuple('SplitResult', 'scheme netloc path query fragment') return SplitResult(sr.scheme, sr.netloc, sr.path.replace('\n', '?'), '', '')
def _parse_uri(uri_as_string): """ Parse the given URI from a string. Supported URI schemes are: * file * hdfs * http * https * s3 * s3a * s3n * s3u * webhdfs .s3, s3a and s3n are treated the same way. s3u is s3 but without SSL. Valid URI examples:: * s3://my_bucket/my_key * s3://my_key:my_secret@my_bucket/my_key * s3://my_key:my_secret@my_server:my_port@my_bucket/my_key * hdfs:///path/file * hdfs://path/file * webhdfs://host:port/path/file * ./local/path/file * ~/local/path/file * local/path/file * ./local/path/file.gz * file:///home/user/file * file:///home/user/file.bz2 * [ssh|scp|sftp]://username@host//path/file * [ssh|scp|sftp]://username@host/path/file """ if os.name == 'nt': # urlsplit doesn't work on Windows -- it parses the drive as the scheme... if '://' not in uri_as_string: # no protocol given => assume a local file uri_as_string = 'file://' + uri_as_string parsed_uri = urlsplit(uri_as_string, allow_fragments=False) if parsed_uri.scheme == "hdfs": return _parse_uri_hdfs(parsed_uri) elif parsed_uri.scheme == "webhdfs": return _parse_uri_webhdfs(parsed_uri) elif parsed_uri.scheme in smart_open_s3.SUPPORTED_SCHEMES: return _parse_uri_s3x(parsed_uri) elif parsed_uri.scheme == 'file': return _parse_uri_file(parsed_uri.netloc + parsed_uri.path) elif parsed_uri.scheme in ('', None): return _parse_uri_file(uri_as_string) elif parsed_uri.scheme.startswith('http'): return Uri(scheme=parsed_uri.scheme, uri_path=uri_as_string) elif parsed_uri.scheme in smart_open_ssh.SCHEMES: return _parse_uri_ssh(parsed_uri) else: raise NotImplementedError("unknown URI scheme %r in %r" % (parsed_uri.scheme, uri_as_string))
def __init__(self, uri, default_scheme="file"): """ Assume `default_scheme` if no scheme given in `uri`. """ if os.name == 'nt': # urlsplit doesn't work on Windows -- it parses the drive as the scheme... if '://' not in uri: # no protocol given => assume a local file uri = 'file://' + uri parsed_uri = urlsplit(uri) self.scheme = parsed_uri.scheme if parsed_uri.scheme else default_scheme if self.scheme == "hdfs": self.uri_path = parsed_uri.netloc + parsed_uri.path self.uri_path = "/" + self.uri_path.lstrip("/") if not self.uri_path: raise RuntimeError("invalid HDFS URI: %s" % uri) elif self.scheme == "webhdfs": self.uri_path = parsed_uri.netloc + "/webhdfs/v1" + parsed_uri.path if parsed_uri.query: self.uri_path += "?" + parsed_uri.query if not self.uri_path: raise RuntimeError("invalid WebHDFS URI: %s" % uri) elif self.scheme in ("s3", "s3n"): self.bucket_id = (parsed_uri.netloc + parsed_uri.path).split('@') self.key_id = None if len(self.bucket_id) == 1: # URI without credentials: s3://bucket/object self.bucket_id, self.key_id = self.bucket_id[0].split('/', 1) # "None" credentials are interpreted as "look for credentials in other locations" by boto self.access_id, self.access_secret = None, None elif len(self.bucket_id) == 2 and len(self.bucket_id[0].split(':')) == 2: # URI in full format: s3://key:secret@bucket/object # access key id: [A-Z0-9]{20} # secret access key: [A-Za-z0-9/+=]{40} acc, self.bucket_id = self.bucket_id self.access_id, self.access_secret = acc.split(':') self.bucket_id, self.key_id = self.bucket_id.split('/', 1) else: # more than 1 '@' means invalid uri # Bucket names must be at least 3 and no more than 63 characters long. # Bucket names must be a series of one or more labels. # Adjacent labels are separated by a single period (.). # Bucket names can contain lowercase letters, numbers, and hyphens. # Each label must start and end with a lowercase letter or a number. raise RuntimeError("invalid S3 URI: %s" % uri) elif self.scheme == 'file': self.uri_path = parsed_uri.netloc + parsed_uri.path # '~/tmp' may be expanded to '/Users/username/tmp' self.uri_path = os.path.expanduser(self.uri_path) if not self.uri_path: raise RuntimeError("invalid file URI: %s" % uri) else: raise NotImplementedError("unknown URI scheme %r in %r" % (self.scheme, uri))
def __init__(self, uri, default_scheme="file"): """ Assume `default_scheme` if no scheme given in `uri`. """ if os.name == 'nt': # urlsplit doesn't work on Windows -- it parses the drive as the scheme... if '://' not in uri: # no protocol given => assume a local file uri = 'file://' + uri parsed_uri = urlsplit(uri) self.scheme = parsed_uri.scheme if parsed_uri.scheme else default_scheme if self.scheme == "hdfs": self.uri_path = parsed_uri.netloc + parsed_uri.path self.uri_path = "/" + self.uri_path.lstrip("/") if not self.uri_path: raise RuntimeError("invalid HDFS URI: %s" % uri) elif self.scheme == "webhdfs": self.uri_path = parsed_uri.netloc + "/webhdfs/v1" + parsed_uri.path if not self.uri_path: raise RuntimeError("invalid WebHDFS URI: %s" % uri) elif self.scheme in ("s3", "s3n"): self.bucket_id = (parsed_uri.netloc + parsed_uri.path).split('@') self.key_id = None if len(self.bucket_id) == 1: # URI without credentials: s3://bucket/object self.bucket_id, self.key_id = self.bucket_id[0].split('/', 1) # "None" credentials are interpreted as "look for credentials in other locations" by boto self.access_id, self.access_secret = None, None elif len(self.bucket_id) == 2 and len( self.bucket_id[0].split(':')) == 2: # URI in full format: s3://key:secret@bucket/object # access key id: [A-Z0-9]{20} # secret access key: [A-Za-z0-9/+=]{40} acc, self.bucket_id = self.bucket_id self.access_id, self.access_secret = acc.split(':') self.bucket_id, self.key_id = self.bucket_id.split('/', 1) else: # more than 1 '@' means invalid uri # Bucket names must be at least 3 and no more than 63 characters long. # Bucket names must be a series of one or more labels. # Adjacent labels are separated by a single period (.). # Bucket names can contain lowercase letters, numbers, and hyphens. # Each label must start and end with a lowercase letter or a number. raise RuntimeError("invalid S3 URI: %s" % uri) elif self.scheme == 'file': self.uri_path = parsed_uri.netloc + parsed_uri.path if not self.uri_path: raise RuntimeError("invalid file URI: %s" % uri) else: raise NotImplementedError("unknown URI scheme %r in %r" % (self.scheme, uri))
def read_sample_csv(filename_to_read, inConn): splitInputDir = urlsplit(filename_to_read, allow_fragments=False) if inConn is None: inConn = get_objectstore_conn() inbucket = inConn.get_bucket(splitInputDir.netloc) kr = inbucket.get_key(splitInputDir.path) assert kr is not None, 'Unable to read file. File may be absent' with smart_open.smart_open(kr, 'r') as fin: data = pn.read_csv(fin, header=0, error_bad_lines=False, dtype='str').fillna('NA') return data
def _parse_uri(uri_as_string): """ Parse the given URI from a string. Supported URI schemes are "file", "s3", "s3n", "s3u" and "hdfs". * s3 and s3n are treated the same way. * s3u is s3 but without SSL. Valid URI examples:: * s3://my_bucket/my_key * s3://my_key:my_secret@my_bucket/my_key * s3://my_key:my_secret@my_server:my_port@my_bucket/my_key * hdfs:///path/file * hdfs://path/file * webhdfs://host:port/path/file * ./local/path/file * ~/local/path/file * local/path/file * ./local/path/file.gz * file:///home/user/file * file:///home/user/file.bz2 """ if os.name == 'nt': # urlsplit doesn't work on Windows -- it parses the drive as the scheme... if '://' not in uri_as_string: # no protocol given => assume a local file uri_as_string = 'file://' + uri_as_string parsed_uri = urlsplit(uri_as_string, allow_fragments=False) if parsed_uri.scheme == "hdfs": return _parse_uri_hdfs(parsed_uri) elif parsed_uri.scheme == "webhdfs": return _parse_uri_webhdfs(parsed_uri) elif parsed_uri.scheme in ("s3", "s3n", "s3u"): return _parse_uri_s3x(parsed_uri) elif parsed_uri.scheme in ('file', '', None): return _parse_uri_file(parsed_uri) elif parsed_uri.scheme.startswith('http'): return Uri(scheme=parsed_uri.scheme, uri_path=uri_as_string) else: raise NotImplementedError( "unknown URI scheme %r in %r" % (parsed_uri.scheme, uri_as_string) )
def HttpOpenRead(parsed_uri, mode='r', **kwargs): if parsed_uri.scheme not in ('http', 'https'): raise TypeError("can only process http/https urls") if mode not in ('r', 'rb'): raise NotImplementedError('Streaming write to http not supported') url = parsed_uri.uri_path response = HttpReadStream(url, **kwargs) fname = urlsplit(url, allow_fragments=False).path.split('/')[-1] if fname.endswith('.gz'): # Gzip needs a seek-able filehandle, so we need to buffer it. buffer = make_closing(io.BytesIO)(response.binary_content()) return compression_wrapper(buffer, fname, mode) else: return compression_wrapper(response, fname, mode)
def write_avro_context_manager(data, filename_to_write, inConn=None, num_lines=100): avroSchemaOut = gen_schema(data) schema = avro.schema.parse(avroSchemaOut) dictRes = data.to_dict(orient='records') splitInputDir = urlsplit(filename_to_write, allow_fragments=False) if inConn is None: inConn = get_objectstore_conn() inbucket = inConn.get_bucket(splitInputDir.netloc) kw = inbucket.get_key(splitInputDir.path, validate=False) assert kw is not None, "Unable to get avro key to write" with smart_open.smart_open(kw, 'wb') as foutd: foutd.flush = lambda: None with avro.datafile.DataFileWriter(foutd, avro.io.DatumWriter(), schema) as writer_contextManager: for ll, row in enumerate(dictRes): writer_contextManager.append(row)
def __init__(self, uri, default_scheme="file"): """ Assume `default_scheme` if no scheme given in `uri`. """ if os.name == 'nt': # urlsplit doesn't work on Windows -- it parses the drive as the scheme... if '://' not in uri: # no protocol given => assume a local file uri = 'file://' + uri parsed_uri = urlsplit(uri, allow_fragments=False) self.scheme = parsed_uri.scheme if parsed_uri.scheme else default_scheme if self.scheme == "hdfs": self.uri_path = parsed_uri.netloc + parsed_uri.path self.uri_path = "/" + self.uri_path.lstrip("/") if not self.uri_path: raise RuntimeError("invalid HDFS URI: %s" % uri) elif self.scheme == "webhdfs": self.uri_path = parsed_uri.netloc + "/webhdfs/v1" + parsed_uri.path if parsed_uri.query: self.uri_path += "?" + parsed_uri.query if not self.uri_path: raise RuntimeError("invalid WebHDFS URI: %s" % uri) elif self.scheme in ("s3", "s3n", "s3u"): self.bucket_id = (parsed_uri.netloc + parsed_uri.path).split('@') self.key_id = None self.port = 443 self.host = boto.config.get('s3', 'host', 's3.amazonaws.com') self.ordinary_calling_format = False if len(self.bucket_id) == 1: # URI without credentials: s3://bucket/object self.bucket_id, self.key_id = self.bucket_id[0].split('/', 1) # "None" credentials are interpreted as "look for credentials in other locations" by boto self.access_id, self.access_secret = None, None elif len(self.bucket_id) == 2 and len( self.bucket_id[0].split(':')) == 2: # URI in full format: s3://key:secret@bucket/object # access key id: [A-Z0-9]{20} # secret access key: [A-Za-z0-9/+=]{40} acc, self.bucket_id = self.bucket_id self.access_id, self.access_secret = acc.split(':') self.bucket_id, self.key_id = self.bucket_id.split('/', 1) elif len(self.bucket_id) == 3 and len( self.bucket_id[0].split(':')) == 2: # or URI in extended format: s3://key:secret@server[:port]@bucket/object acc, server, self.bucket_id = self.bucket_id self.access_id, self.access_secret = acc.split(':') self.bucket_id, self.key_id = self.bucket_id.split('/', 1) server = server.split(':') self.ordinary_calling_format = True self.host = server[0] if len(server) == 2: self.port = int(server[1]) else: # more than 2 '@' means invalid uri # Bucket names must be at least 3 and no more than 63 characters long. # Bucket names must be a series of one or more labels. # Adjacent labels are separated by a single period (.). # Bucket names can contain lowercase letters, numbers, and hyphens. # Each label must start and end with a lowercase letter or a number. raise RuntimeError("invalid S3 URI: %s" % uri) elif self.scheme == 'file': self.uri_path = parsed_uri.netloc + parsed_uri.path # '~/tmp' may be expanded to '/Users/username/tmp' self.uri_path = os.path.expanduser(self.uri_path) if not self.uri_path: raise RuntimeError("invalid file URI: %s" % uri) elif self.scheme.startswith('http'): self.uri_path = uri elif self.scheme == 'gs': self.uri_path = uri else: raise NotImplementedError("unknown URI scheme %r in %r" % (self.scheme, uri))
def test_1_basic(self): print('--- running S3Connection tests ---') c = S3Connection() # create a new, empty bucket bucket_name = 'test-%d' % int(time.time()) bucket = c.create_bucket(bucket_name) # now try a get_bucket call and see if it's really there bucket = c.get_bucket(bucket_name) # test logging logging_bucket = c.create_bucket(bucket_name + '-log') logging_bucket.set_as_logging_target() bucket.enable_logging(target_bucket=logging_bucket, target_prefix=bucket.name) bucket.disable_logging() c.delete_bucket(logging_bucket) k = bucket.new_key('foobar') s1 = 'This is a test of file upload and download' s2 = 'This is a second string to test file upload and download' k.set_contents_from_string(s1) fp = open('foobar', 'wb') # now get the contents from s3 to a local file k.get_contents_to_file(fp) fp.close() fp = open('foobar') # check to make sure content read from s3 is identical to original assert s1 == fp.read(), 'corrupted file' fp.close() # test generated URLs url = k.generate_url(3600) file = urlopen(url) assert s1 == file.read().decode('utf-8'), 'invalid URL %s' % url url = k.generate_url(3600, force_http=True) file = urlopen(url) assert s1 == file.read().decode('utf-8'), 'invalid URL %s' % url url = k.generate_url(3600, force_http=True, headers={'x-amz-x-token': 'XYZ'}) file = urlopen(url) assert s1 == file.read().decode('utf-8'), 'invalid URL %s' % url rh = {'response-content-disposition': 'attachment; filename="foo.txt"'} url = k.generate_url(60, response_headers=rh) file = urlopen(url) assert s1 == file.read().decode('utf-8'), 'invalid URL %s' % url #test whether amperands and to-be-escaped characters work in header filename rh = { 'response-content-disposition': 'attachment; filename="foo&z%20ar&ar&zar&bar.txt"' } url = k.generate_url(60, response_headers=rh, force_http=True) file = urlopen(url) assert s1 == file.read().decode('utf-8'), 'invalid URL %s' % url # overwrite foobar contents with a PUT url = k.generate_url(3600, 'PUT', force_http=True, policy='private', reduced_redundancy=True) up = urlsplit(url) con = http_client.HTTPConnection(up.hostname, up.port) con.request("PUT", up.path + '?' + up.query, body="hello there") resp = con.getresponse() assert 200 == resp.status assert b"hello there" == k.get_contents_as_string() bucket.delete_key(k) # test a few variations on get_all_keys - first load some data # for the first one, let's override the content type phony_mimetype = 'application/x-boto-test' headers = {'Content-Type': phony_mimetype} k.name = 'foo/bar' k.set_contents_from_string(s1, headers) k.name = 'foo/bas' size = k.set_contents_from_filename('foobar') assert size == 42 k.name = 'foo/bat' k.set_contents_from_string(s1) k.name = 'fie/bar' k.set_contents_from_string(s1) k.name = 'fie/bas' k.set_contents_from_string(s1) k.name = 'fie/bat' k.set_contents_from_string(s1) # try resetting the contents to another value md5 = k.md5 k.set_contents_from_string(s2) assert k.md5 != md5 os.unlink('foobar') all = bucket.get_all_keys() assert len(all) == 6 rs = bucket.get_all_keys(prefix='foo') assert len(rs) == 3 rs = bucket.get_all_keys(prefix='', delimiter='/') assert len(rs) == 2 rs = bucket.get_all_keys(maxkeys=5) assert len(rs) == 5 # test the lookup method k = bucket.lookup('foo/bar') assert isinstance(k, bucket.key_class) assert k.content_type == phony_mimetype k = bucket.lookup('notthere') assert k == None # try some metadata stuff k = bucket.new_key('has_metadata') mdkey1 = 'meta1' mdval1 = 'This is the first metadata value' k.set_metadata(mdkey1, mdval1) mdkey2 = 'meta2' mdval2 = 'This is the second metadata value' k.set_metadata(mdkey2, mdval2) # try a unicode metadata value mdval3 = u'föö' mdkey3 = 'meta3' k.set_metadata(mdkey3, mdval3) k.set_contents_from_string(s1) k = bucket.lookup('has_metadata') assert k.get_metadata(mdkey1) == mdval1 assert k.get_metadata(mdkey2) == mdval2 assert k.get_metadata(mdkey3) == mdval3 k = bucket.new_key('has_metadata') k.get_contents_as_string() assert k.get_metadata(mdkey1) == mdval1 assert k.get_metadata(mdkey2) == mdval2 assert k.get_metadata(mdkey3) == mdval3 bucket.delete_key(k) # test list and iterator rs1 = bucket.list() num_iter = 0 for r in rs1: num_iter = num_iter + 1 rs = bucket.get_all_keys() num_keys = len(rs) assert num_iter == num_keys # try a key with a funny character k = bucket.new_key('testnewline\n') k.set_contents_from_string('This is a test') rs = bucket.get_all_keys() assert len(rs) == num_keys + 1 bucket.delete_key(k) rs = bucket.get_all_keys() assert len(rs) == num_keys # try some acl stuff bucket.set_acl('public-read') policy = bucket.get_acl() assert len(policy.acl.grants) == 2 bucket.set_acl('private') policy = bucket.get_acl() assert len(policy.acl.grants) == 1 k = bucket.lookup('foo/bar') k.set_acl('public-read') policy = k.get_acl() assert len(policy.acl.grants) == 2 k.set_acl('private') policy = k.get_acl() assert len(policy.acl.grants) == 1 # try the convenience methods for grants bucket.add_user_grant( 'FULL_CONTROL', 'c1e724fbfa0979a4448393c59a8c055011f739b6d102fb37a65f26414653cd67') try: bucket.add_email_grant('foobar', '*****@*****.**') except S3PermissionsError: pass # now try to create an RRS key k = bucket.new_key('reduced_redundancy') k.set_contents_from_string('This key has reduced redundancy', reduced_redundancy=True) # now try to inject a response header data = k.get_contents_as_string( response_headers={'response-content-type': 'foo/bar'}) assert k.content_type == 'foo/bar' # now delete all keys in bucket for k in bucket: if k.name == 'reduced_redundancy': assert k.storage_class == 'REDUCED_REDUNDANCY' bucket.delete_key(k) # now delete bucket time.sleep(5) c.delete_bucket(bucket) print('--- tests completed ---')
def test_1_basic(self): print('--- running S3Connection tests ---') c = S3Connection() # create a new, empty bucket bucket_name = 'test-%d' % int(time.time()) bucket = c.create_bucket(bucket_name) # now try a get_bucket call and see if it's really there bucket = c.get_bucket(bucket_name) # test logging logging_bucket = c.create_bucket(bucket_name + '-log') logging_bucket.set_as_logging_target() bucket.enable_logging(target_bucket=logging_bucket, target_prefix=bucket.name) bucket.disable_logging() c.delete_bucket(logging_bucket) k = bucket.new_key('foobar') s1 = 'This is a test of file upload and download' s2 = 'This is a second string to test file upload and download' k.set_contents_from_string(s1) fp = open('foobar', 'wb') # now get the contents from s3 to a local file k.get_contents_to_file(fp) fp.close() fp = open('foobar') # check to make sure content read from s3 is identical to original assert s1 == fp.read(), 'corrupted file' fp.close() # test generated URLs url = k.generate_url(3600) file = urlopen(url) assert s1 == file.read().decode('utf-8'), 'invalid URL %s' % url url = k.generate_url(3600, force_http=True) file = urlopen(url) assert s1 == file.read().decode('utf-8'), 'invalid URL %s' % url url = k.generate_url(3600, force_http=True, headers={'x-amz-x-token' : 'XYZ'}) file = urlopen(url) assert s1 == file.read().decode('utf-8'), 'invalid URL %s' % url rh = {'response-content-disposition': 'attachment; filename="foo.txt"'} url = k.generate_url(60, response_headers=rh) file = urlopen(url) assert s1 == file.read().decode('utf-8'), 'invalid URL %s' % url #test whether amperands and to-be-escaped characters work in header filename rh = {'response-content-disposition': 'attachment; filename="foo&z%20ar&ar&zar&bar.txt"'} url = k.generate_url(60, response_headers=rh, force_http=True) file = urlopen(url) assert s1 == file.read().decode('utf-8'), 'invalid URL %s' % url # overwrite foobar contents with a PUT url = k.generate_url(3600, 'PUT', force_http=True, policy='private', reduced_redundancy=True) up = urlsplit(url) con = http_client.HTTPConnection(up.hostname, up.port) con.request("PUT", up.path + '?' + up.query, body="hello there") resp = con.getresponse() assert 200 == resp.status assert b"hello there" == k.get_contents_as_string() bucket.delete_key(k) # test a few variations on get_all_keys - first load some data # for the first one, let's override the content type phony_mimetype = 'application/x-boto-test' headers = {'Content-Type': phony_mimetype} k.name = 'foo/bar' k.set_contents_from_string(s1, headers) k.name = 'foo/bas' size = k.set_contents_from_filename('foobar') assert size == 42 k.name = 'foo/bat' k.set_contents_from_string(s1) k.name = 'fie/bar' k.set_contents_from_string(s1) k.name = 'fie/bas' k.set_contents_from_string(s1) k.name = 'fie/bat' k.set_contents_from_string(s1) # try resetting the contents to another value md5 = k.md5 k.set_contents_from_string(s2) assert k.md5 != md5 os.unlink('foobar') all = bucket.get_all_keys() assert len(all) == 6 rs = bucket.get_all_keys(prefix='foo') assert len(rs) == 3 rs = bucket.get_all_keys(prefix='', delimiter='/') assert len(rs) == 2 rs = bucket.get_all_keys(maxkeys=5) assert len(rs) == 5 # test the lookup method k = bucket.lookup('foo/bar') assert isinstance(k, bucket.key_class) assert k.content_type == phony_mimetype k = bucket.lookup('notthere') assert k == None # try some metadata stuff k = bucket.new_key('has_metadata') mdkey1 = 'meta1' mdval1 = 'This is the first metadata value' k.set_metadata(mdkey1, mdval1) mdkey2 = 'meta2' mdval2 = 'This is the second metadata value' k.set_metadata(mdkey2, mdval2) # try a unicode metadata value mdval3 = u'föö' mdkey3 = 'meta3' k.set_metadata(mdkey3, mdval3) k.set_contents_from_string(s1) k = bucket.lookup('has_metadata') assert k.get_metadata(mdkey1) == mdval1 assert k.get_metadata(mdkey2) == mdval2 assert k.get_metadata(mdkey3) == mdval3 k = bucket.new_key('has_metadata') k.get_contents_as_string() assert k.get_metadata(mdkey1) == mdval1 assert k.get_metadata(mdkey2) == mdval2 assert k.get_metadata(mdkey3) == mdval3 bucket.delete_key(k) # test list and iterator rs1 = bucket.list() num_iter = 0 for r in rs1: num_iter = num_iter + 1 rs = bucket.get_all_keys() num_keys = len(rs) assert num_iter == num_keys # try a key with a funny character k = bucket.new_key('testnewline\n') k.set_contents_from_string('This is a test') rs = bucket.get_all_keys() assert len(rs) == num_keys + 1 bucket.delete_key(k) rs = bucket.get_all_keys() assert len(rs) == num_keys # try some acl stuff bucket.set_acl('public-read') policy = bucket.get_acl() assert len(policy.acl.grants) == 2 bucket.set_acl('private') policy = bucket.get_acl() assert len(policy.acl.grants) == 1 k = bucket.lookup('foo/bar') k.set_acl('public-read') policy = k.get_acl() assert len(policy.acl.grants) == 2 k.set_acl('private') policy = k.get_acl() assert len(policy.acl.grants) == 1 # try the convenience methods for grants bucket.add_user_grant('FULL_CONTROL', 'c1e724fbfa0979a4448393c59a8c055011f739b6d102fb37a65f26414653cd67') try: bucket.add_email_grant('foobar', '*****@*****.**') except S3PermissionsError: pass # now try to create an RRS key k = bucket.new_key('reduced_redundancy') k.set_contents_from_string('This key has reduced redundancy', reduced_redundancy=True) # now try to inject a response header data = k.get_contents_as_string(response_headers={'response-content-type' : 'foo/bar'}) assert k.content_type == 'foo/bar' # now delete all keys in bucket for k in bucket: if k.name == 'reduced_redundancy': assert k.storage_class == 'REDUCED_REDUNDANCY' bucket.delete_key(k) # now delete bucket time.sleep(5) c.delete_bucket(bucket) print('--- tests completed ---')