def test_webhdfs_uri(self): """Do webhdfs URIs parse correctly""" # valid uri, no query parsed_uri = smart_open_lib._parse_uri("webhdfs://host:port/path/file") self.assertEqual(parsed_uri.scheme, "webhdfs") self.assertEqual(parsed_uri.uri_path, "host:port/webhdfs/v1/path/file") # valid uri, with query parsed_uri = smart_open_lib._parse_uri("webhdfs://host:port/path/file?query_part_1&query_part2") self.assertEqual(parsed_uri.scheme, "webhdfs") self.assertEqual(parsed_uri.uri_path, "host:port/webhdfs/v1/path/file?query_part_1&query_part2")
def test_scheme(self): """Do URIs schemes parse correctly?""" # supported schemes for scheme in ("s3", "s3n", "hdfs", "file", "http", "https"): parsed_uri = smart_open_lib._parse_uri(scheme + "://mybucket/mykey") self.assertEqual(parsed_uri.scheme, scheme) # unsupported scheme => NotImplementedError self.assertRaises(NotImplementedError, smart_open_lib._parse_uri, "foobar://mybucket/mykey") # unknown scheme => default_scheme parsed_uri = smart_open_lib._parse_uri("blah blah") self.assertEqual(parsed_uri.scheme, "file")
def test_s3_uri_contains_slash(self): parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mydir/mykey") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "mydir/mykey") self.assertEqual(parsed_uri.access_id, None) self.assertEqual(parsed_uri.access_secret, None)
def test_s3_uri_with_credentials(self): parsed_uri = smart_open_lib._parse_uri( "s3://ACCESSID456:acces/sse_cr-et@mybucket/mykey") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "mykey") self.assertEqual(parsed_uri.access_id, "ACCESSID456") self.assertEqual(parsed_uri.access_secret, "acces/sse_cr-et")
def test_s3_uri_with_credentials2(self): parsed_uri = smart_open_lib._parse_uri( "s3://accessid:access/secret@mybucket/mykey") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "mykey") self.assertEqual(parsed_uri.access_id, "accessid") self.assertEqual(parsed_uri.access_secret, "access/secret")
def test_s3_uri_has_atmark_in_key_name(self): parsed_uri = smart_open_lib._parse_uri( "s3://accessid:access/secret@mybucket/my@ke@y") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "my@ke@y") self.assertEqual(parsed_uri.access_id, "accessid") self.assertEqual(parsed_uri.access_secret, "access/secret")
def test_s3_uri(self): """Do S3 URIs parse correctly?""" # correct uri without credentials parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mykey") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "mykey") self.assertEqual(parsed_uri.access_id, None) self.assertEqual(parsed_uri.access_secret, None)
def test_gzip_write_mode(self): """Should always open in binary mode when writing through a codec.""" s3 = boto3.resource('s3') s3.create_bucket(Bucket='bucket') uri = smart_open_lib._parse_uri("s3://bucket/key.gz") with mock.patch('smart_open.smart_open_s3.open') as mock_open: smart_open.smart_open("s3://bucket/key.gz", "wb") mock_open.assert_called_with('bucket', 'key.gz', 'wb')
def test_gzip_write_mode(self): """Should always open in binary mode when writing through a codec.""" s3 = boto3.resource('s3') s3.create_bucket(Bucket='bucket') uri = smart_open_lib._parse_uri("s3://bucket/key.gz") with mock.patch('smart_open.s3.open') as mock_open: smart_open.smart_open("s3://bucket/key.gz", "wb") mock_open.assert_called_with('bucket', 'key.gz', 'wb')
def test_s3_uri_has_atmark_in_key_name2(self): parsed_uri = smart_open_lib._parse_uri( "s3://accessid:access/secret@hostname:1234@mybucket/dir/my@ke@y") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "dir/my@ke@y") self.assertEqual(parsed_uri.access_id, "accessid") self.assertEqual(parsed_uri.access_secret, "access/secret") self.assertEqual(parsed_uri.host, "hostname") self.assertEqual(parsed_uri.port, 1234)
def test_uri_from_issue_223_works(self): parsed_uri = smart_open_lib._parse_uri( "s3://:@omax-mis/twilio-messages-media/final/MEcd7c36e75f87dc6dd9e33702cdcd8fb6" ) self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "omax-mis") self.assertEqual( parsed_uri.key_id, "twilio-messages-media/final/MEcd7c36e75f87dc6dd9e33702cdcd8fb6") self.assertEqual(parsed_uri.access_id, "") self.assertEqual(parsed_uri.access_secret, "")
def test_s3_uri(self): """Do S3 URIs parse correctly?""" # correct uri without credentials parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mykey") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "mykey") self.assertEqual(parsed_uri.access_id, None) self.assertEqual(parsed_uri.access_secret, None) # correct uri, key contains slash parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mydir/mykey") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "mydir/mykey") self.assertEqual(parsed_uri.access_id, None) self.assertEqual(parsed_uri.access_secret, None) # correct uri with credentials parsed_uri = smart_open_lib._parse_uri( "s3://ACCESSID456:acces/sse_cr-et@mybucket/mykey") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "mykey") self.assertEqual(parsed_uri.access_id, "ACCESSID456") self.assertEqual(parsed_uri.access_secret, "acces/sse_cr-et") # correct uri, contains credentials parsed_uri = smart_open_lib._parse_uri( "s3://accessid:access/secret@mybucket/mykey") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "mykey") self.assertEqual(parsed_uri.access_id, "accessid") self.assertEqual(parsed_uri.access_secret, "access/secret") # incorrect uri - only two '@' in uri are allowed self.assertRaises(RuntimeError, smart_open_lib._parse_uri, "s3://access_id@access_secret@mybucket@port/mykey")
def test_s3_uri(self): """Do S3 URIs parse correctly?""" # correct uri without credentials parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mykey") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "mykey") self.assertEqual(parsed_uri.access_id, None) self.assertEqual(parsed_uri.access_secret, None) # correct uri, key contains slash parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mydir/mykey") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "mydir/mykey") self.assertEqual(parsed_uri.access_id, None) self.assertEqual(parsed_uri.access_secret, None) # correct uri with credentials parsed_uri = smart_open_lib._parse_uri("s3://ACCESSID456:acces/sse_cr-et@mybucket/mykey") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "mykey") self.assertEqual(parsed_uri.access_id, "ACCESSID456") self.assertEqual(parsed_uri.access_secret, "acces/sse_cr-et") # correct uri, contains credentials parsed_uri = smart_open_lib._parse_uri("s3://accessid:access/secret@mybucket/mykey") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "mykey") self.assertEqual(parsed_uri.access_id, "accessid") self.assertEqual(parsed_uri.access_secret, "access/secret") # incorrect uri - only two '@' in uri are allowed self.assertRaises(RuntimeError, smart_open_lib._parse_uri, "s3://access_id@access_secret@mybucket@port/mykey")
def test_bad_mode(self): """Bad mode should raise and exception.""" uri = smart_open_lib._parse_uri("s3://bucket/key") self.assertRaises(NotImplementedError, smart_open.smart_open, uri, "x")
def test_s3_uri(self): """Do S3 URIs parse correctly?""" # correct uri without credentials parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mykey") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "mykey") self.assertEqual(parsed_uri.access_id, None) self.assertEqual(parsed_uri.access_secret, None) # correct uri, key contains slash parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mydir/mykey") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "mydir/mykey") self.assertEqual(parsed_uri.access_id, None) self.assertEqual(parsed_uri.access_secret, None) # correct uri with credentials parsed_uri = smart_open_lib._parse_uri( "s3://ACCESSID456:acces/sse_cr-et@mybucket/mykey") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "mykey") self.assertEqual(parsed_uri.access_id, "ACCESSID456") self.assertEqual(parsed_uri.access_secret, "acces/sse_cr-et") # correct uri, contains credentials parsed_uri = smart_open_lib._parse_uri( "s3://accessid:access/secret@mybucket/mykey") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "mykey") self.assertEqual(parsed_uri.access_id, "accessid") self.assertEqual(parsed_uri.access_secret, "access/secret") # correct uri, contains credentials and '@' in object name parsed_uri = smart_open_lib._parse_uri( "s3://accessid:access/secret@mybucket/my@ke@y") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "my@ke@y") self.assertEqual(parsed_uri.access_id, "accessid") self.assertEqual(parsed_uri.access_secret, "access/secret") # correct uri, contains credentials, host, port and '@' in object name parsed_uri = smart_open_lib._parse_uri( "s3://accessid:access/secret@hostname:1234@mybucket/dir/my@ke@y") self.assertEqual(parsed_uri.scheme, "s3") self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.key_id, "dir/my@ke@y") self.assertEqual(parsed_uri.access_id, "accessid") self.assertEqual(parsed_uri.access_secret, "access/secret") self.assertEqual(parsed_uri.host, "hostname") self.assertEqual(parsed_uri.port, 1234) # incorrect uri - bucket can't contain '@' self.assertRaises(RuntimeError, smart_open_lib._parse_uri, "s3://access_id:access_secret@my@bucket@port/mykey") # incorrect uri - colon should separate secret and key self.assertRaises(RuntimeError, smart_open_lib._parse_uri, "s3://access_id@access_secret@mybucket@port/mykey")