コード例 #1
0
    def test_webhdfs_uri(self):
        """Do webhdfs URIs parse correctly"""
        # valid uri, no query
        parsed_uri = smart_open_lib._parse_uri("webhdfs://host:port/path/file")
        self.assertEqual(parsed_uri.scheme, "webhdfs")
        self.assertEqual(parsed_uri.uri_path, "host:port/webhdfs/v1/path/file")

        # valid uri, with query
        parsed_uri = smart_open_lib._parse_uri("webhdfs://host:port/path/file?query_part_1&query_part2")
        self.assertEqual(parsed_uri.scheme, "webhdfs")
        self.assertEqual(parsed_uri.uri_path, "host:port/webhdfs/v1/path/file?query_part_1&query_part2")
コード例 #2
0
ファイル: test_smart_open.py プロジェクト: sayatul/smart_open
    def test_webhdfs_uri(self):
        """Do webhdfs URIs parse correctly"""
        # valid uri, no query
        parsed_uri = smart_open_lib._parse_uri("webhdfs://host:port/path/file")
        self.assertEqual(parsed_uri.scheme, "webhdfs")
        self.assertEqual(parsed_uri.uri_path, "host:port/webhdfs/v1/path/file")

        # valid uri, with query
        parsed_uri = smart_open_lib._parse_uri("webhdfs://host:port/path/file?query_part_1&query_part2")
        self.assertEqual(parsed_uri.scheme, "webhdfs")
        self.assertEqual(parsed_uri.uri_path, "host:port/webhdfs/v1/path/file?query_part_1&query_part2")
コード例 #3
0
ファイル: test_smart_open.py プロジェクト: sayatul/smart_open
    def test_scheme(self):
        """Do URIs schemes parse correctly?"""
        # supported schemes
        for scheme in ("s3", "s3n", "hdfs", "file", "http", "https"):
            parsed_uri = smart_open_lib._parse_uri(scheme + "://mybucket/mykey")
            self.assertEqual(parsed_uri.scheme, scheme)

        # unsupported scheme => NotImplementedError
        self.assertRaises(NotImplementedError, smart_open_lib._parse_uri, "foobar://mybucket/mykey")

        # unknown scheme => default_scheme
        parsed_uri = smart_open_lib._parse_uri("blah blah")
        self.assertEqual(parsed_uri.scheme, "file")
コード例 #4
0
    def test_scheme(self):
        """Do URIs schemes parse correctly?"""
        # supported schemes
        for scheme in ("s3", "s3n", "hdfs", "file", "http", "https"):
            parsed_uri = smart_open_lib._parse_uri(scheme + "://mybucket/mykey")
            self.assertEqual(parsed_uri.scheme, scheme)

        # unsupported scheme => NotImplementedError
        self.assertRaises(NotImplementedError, smart_open_lib._parse_uri, "foobar://mybucket/mykey")

        # unknown scheme => default_scheme
        parsed_uri = smart_open_lib._parse_uri("blah blah")
        self.assertEqual(parsed_uri.scheme, "file")
コード例 #5
0
 def test_s3_uri_contains_slash(self):
     parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mydir/mykey")
     self.assertEqual(parsed_uri.scheme, "s3")
     self.assertEqual(parsed_uri.bucket_id, "mybucket")
     self.assertEqual(parsed_uri.key_id, "mydir/mykey")
     self.assertEqual(parsed_uri.access_id, None)
     self.assertEqual(parsed_uri.access_secret, None)
コード例 #6
0
 def test_s3_uri_with_credentials(self):
     parsed_uri = smart_open_lib._parse_uri(
         "s3://ACCESSID456:acces/sse_cr-et@mybucket/mykey")
     self.assertEqual(parsed_uri.scheme, "s3")
     self.assertEqual(parsed_uri.bucket_id, "mybucket")
     self.assertEqual(parsed_uri.key_id, "mykey")
     self.assertEqual(parsed_uri.access_id, "ACCESSID456")
     self.assertEqual(parsed_uri.access_secret, "acces/sse_cr-et")
コード例 #7
0
 def test_s3_uri_with_credentials2(self):
     parsed_uri = smart_open_lib._parse_uri(
         "s3://accessid:access/secret@mybucket/mykey")
     self.assertEqual(parsed_uri.scheme, "s3")
     self.assertEqual(parsed_uri.bucket_id, "mybucket")
     self.assertEqual(parsed_uri.key_id, "mykey")
     self.assertEqual(parsed_uri.access_id, "accessid")
     self.assertEqual(parsed_uri.access_secret, "access/secret")
コード例 #8
0
 def test_s3_uri_has_atmark_in_key_name(self):
     parsed_uri = smart_open_lib._parse_uri(
         "s3://accessid:access/secret@mybucket/my@ke@y")
     self.assertEqual(parsed_uri.scheme, "s3")
     self.assertEqual(parsed_uri.bucket_id, "mybucket")
     self.assertEqual(parsed_uri.key_id, "my@ke@y")
     self.assertEqual(parsed_uri.access_id, "accessid")
     self.assertEqual(parsed_uri.access_secret, "access/secret")
コード例 #9
0
 def test_s3_uri(self):
     """Do S3 URIs parse correctly?"""
     # correct uri without credentials
     parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mykey")
     self.assertEqual(parsed_uri.scheme, "s3")
     self.assertEqual(parsed_uri.bucket_id, "mybucket")
     self.assertEqual(parsed_uri.key_id, "mykey")
     self.assertEqual(parsed_uri.access_id, None)
     self.assertEqual(parsed_uri.access_secret, None)
コード例 #10
0
    def test_gzip_write_mode(self):
        """Should always open in binary mode when writing through a codec."""
        s3 = boto3.resource('s3')
        s3.create_bucket(Bucket='bucket')
        uri = smart_open_lib._parse_uri("s3://bucket/key.gz")

        with mock.patch('smart_open.smart_open_s3.open') as mock_open:
            smart_open.smart_open("s3://bucket/key.gz", "wb")
            mock_open.assert_called_with('bucket', 'key.gz', 'wb')
コード例 #11
0
    def test_gzip_write_mode(self):
        """Should always open in binary mode when writing through a codec."""
        s3 = boto3.resource('s3')
        s3.create_bucket(Bucket='bucket')
        uri = smart_open_lib._parse_uri("s3://bucket/key.gz")

        with mock.patch('smart_open.s3.open') as mock_open:
            smart_open.smart_open("s3://bucket/key.gz", "wb")
            mock_open.assert_called_with('bucket', 'key.gz', 'wb')
コード例 #12
0
 def test_s3_uri_has_atmark_in_key_name2(self):
     parsed_uri = smart_open_lib._parse_uri(
         "s3://accessid:access/secret@hostname:1234@mybucket/dir/my@ke@y")
     self.assertEqual(parsed_uri.scheme, "s3")
     self.assertEqual(parsed_uri.bucket_id, "mybucket")
     self.assertEqual(parsed_uri.key_id, "dir/my@ke@y")
     self.assertEqual(parsed_uri.access_id, "accessid")
     self.assertEqual(parsed_uri.access_secret, "access/secret")
     self.assertEqual(parsed_uri.host, "hostname")
     self.assertEqual(parsed_uri.port, 1234)
コード例 #13
0
 def test_uri_from_issue_223_works(self):
     parsed_uri = smart_open_lib._parse_uri(
         "s3://:@omax-mis/twilio-messages-media/final/MEcd7c36e75f87dc6dd9e33702cdcd8fb6"
     )
     self.assertEqual(parsed_uri.scheme, "s3")
     self.assertEqual(parsed_uri.bucket_id, "omax-mis")
     self.assertEqual(
         parsed_uri.key_id,
         "twilio-messages-media/final/MEcd7c36e75f87dc6dd9e33702cdcd8fb6")
     self.assertEqual(parsed_uri.access_id, "")
     self.assertEqual(parsed_uri.access_secret, "")
コード例 #14
0
    def test_s3_uri(self):
        """Do S3 URIs parse correctly?"""
        # correct uri without credentials
        parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mykey")
        self.assertEqual(parsed_uri.scheme, "s3")
        self.assertEqual(parsed_uri.bucket_id, "mybucket")
        self.assertEqual(parsed_uri.key_id, "mykey")
        self.assertEqual(parsed_uri.access_id, None)
        self.assertEqual(parsed_uri.access_secret, None)

        # correct uri, key contains slash
        parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mydir/mykey")
        self.assertEqual(parsed_uri.scheme, "s3")
        self.assertEqual(parsed_uri.bucket_id, "mybucket")
        self.assertEqual(parsed_uri.key_id, "mydir/mykey")
        self.assertEqual(parsed_uri.access_id, None)
        self.assertEqual(parsed_uri.access_secret, None)

        # correct uri with credentials
        parsed_uri = smart_open_lib._parse_uri(
            "s3://ACCESSID456:acces/sse_cr-et@mybucket/mykey")
        self.assertEqual(parsed_uri.scheme, "s3")
        self.assertEqual(parsed_uri.bucket_id, "mybucket")
        self.assertEqual(parsed_uri.key_id, "mykey")
        self.assertEqual(parsed_uri.access_id, "ACCESSID456")
        self.assertEqual(parsed_uri.access_secret, "acces/sse_cr-et")

        # correct uri, contains credentials
        parsed_uri = smart_open_lib._parse_uri(
            "s3://accessid:access/secret@mybucket/mykey")
        self.assertEqual(parsed_uri.scheme, "s3")
        self.assertEqual(parsed_uri.bucket_id, "mybucket")
        self.assertEqual(parsed_uri.key_id, "mykey")
        self.assertEqual(parsed_uri.access_id, "accessid")
        self.assertEqual(parsed_uri.access_secret, "access/secret")

        # incorrect uri - only two '@' in uri are allowed
        self.assertRaises(RuntimeError, smart_open_lib._parse_uri,
                          "s3://access_id@access_secret@mybucket@port/mykey")
コード例 #15
0
    def test_s3_uri(self):
        """Do S3 URIs parse correctly?"""
        # correct uri without credentials
        parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mykey")
        self.assertEqual(parsed_uri.scheme, "s3")
        self.assertEqual(parsed_uri.bucket_id, "mybucket")
        self.assertEqual(parsed_uri.key_id, "mykey")
        self.assertEqual(parsed_uri.access_id, None)
        self.assertEqual(parsed_uri.access_secret, None)

        # correct uri, key contains slash
        parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mydir/mykey")
        self.assertEqual(parsed_uri.scheme, "s3")
        self.assertEqual(parsed_uri.bucket_id, "mybucket")
        self.assertEqual(parsed_uri.key_id, "mydir/mykey")
        self.assertEqual(parsed_uri.access_id, None)
        self.assertEqual(parsed_uri.access_secret, None)

        # correct uri with credentials
        parsed_uri = smart_open_lib._parse_uri("s3://ACCESSID456:acces/sse_cr-et@mybucket/mykey")
        self.assertEqual(parsed_uri.scheme, "s3")
        self.assertEqual(parsed_uri.bucket_id, "mybucket")
        self.assertEqual(parsed_uri.key_id, "mykey")
        self.assertEqual(parsed_uri.access_id, "ACCESSID456")
        self.assertEqual(parsed_uri.access_secret, "acces/sse_cr-et")

        # correct uri, contains credentials
        parsed_uri = smart_open_lib._parse_uri("s3://accessid:access/secret@mybucket/mykey")
        self.assertEqual(parsed_uri.scheme, "s3")
        self.assertEqual(parsed_uri.bucket_id, "mybucket")
        self.assertEqual(parsed_uri.key_id, "mykey")
        self.assertEqual(parsed_uri.access_id, "accessid")
        self.assertEqual(parsed_uri.access_secret, "access/secret")

        # incorrect uri - only two '@' in uri are allowed
        self.assertRaises(RuntimeError, smart_open_lib._parse_uri, "s3://access_id@access_secret@mybucket@port/mykey")
コード例 #16
0
 def test_bad_mode(self):
     """Bad mode should raise and exception."""
     uri = smart_open_lib._parse_uri("s3://bucket/key")
     self.assertRaises(NotImplementedError, smart_open.smart_open, uri, "x")
コード例 #17
0
 def test_bad_mode(self):
     """Bad mode should raise and exception."""
     uri = smart_open_lib._parse_uri("s3://bucket/key")
     self.assertRaises(NotImplementedError, smart_open.smart_open, uri, "x")
コード例 #18
0
    def test_s3_uri(self):
        """Do S3 URIs parse correctly?"""
        # correct uri without credentials
        parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mykey")
        self.assertEqual(parsed_uri.scheme, "s3")
        self.assertEqual(parsed_uri.bucket_id, "mybucket")
        self.assertEqual(parsed_uri.key_id, "mykey")
        self.assertEqual(parsed_uri.access_id, None)
        self.assertEqual(parsed_uri.access_secret, None)

        # correct uri, key contains slash
        parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mydir/mykey")
        self.assertEqual(parsed_uri.scheme, "s3")
        self.assertEqual(parsed_uri.bucket_id, "mybucket")
        self.assertEqual(parsed_uri.key_id, "mydir/mykey")
        self.assertEqual(parsed_uri.access_id, None)
        self.assertEqual(parsed_uri.access_secret, None)

        # correct uri with credentials
        parsed_uri = smart_open_lib._parse_uri(
            "s3://ACCESSID456:acces/sse_cr-et@mybucket/mykey")
        self.assertEqual(parsed_uri.scheme, "s3")
        self.assertEqual(parsed_uri.bucket_id, "mybucket")
        self.assertEqual(parsed_uri.key_id, "mykey")
        self.assertEqual(parsed_uri.access_id, "ACCESSID456")
        self.assertEqual(parsed_uri.access_secret, "acces/sse_cr-et")

        # correct uri, contains credentials
        parsed_uri = smart_open_lib._parse_uri(
            "s3://accessid:access/secret@mybucket/mykey")
        self.assertEqual(parsed_uri.scheme, "s3")
        self.assertEqual(parsed_uri.bucket_id, "mybucket")
        self.assertEqual(parsed_uri.key_id, "mykey")
        self.assertEqual(parsed_uri.access_id, "accessid")
        self.assertEqual(parsed_uri.access_secret, "access/secret")

        # correct uri, contains credentials and '@' in object name
        parsed_uri = smart_open_lib._parse_uri(
            "s3://accessid:access/secret@mybucket/my@ke@y")
        self.assertEqual(parsed_uri.scheme, "s3")
        self.assertEqual(parsed_uri.bucket_id, "mybucket")
        self.assertEqual(parsed_uri.key_id, "my@ke@y")
        self.assertEqual(parsed_uri.access_id, "accessid")
        self.assertEqual(parsed_uri.access_secret, "access/secret")

        # correct uri, contains credentials, host, port and '@' in object name
        parsed_uri = smart_open_lib._parse_uri(
            "s3://accessid:access/secret@hostname:1234@mybucket/dir/my@ke@y")
        self.assertEqual(parsed_uri.scheme, "s3")
        self.assertEqual(parsed_uri.bucket_id, "mybucket")
        self.assertEqual(parsed_uri.key_id, "dir/my@ke@y")
        self.assertEqual(parsed_uri.access_id, "accessid")
        self.assertEqual(parsed_uri.access_secret, "access/secret")
        self.assertEqual(parsed_uri.host, "hostname")
        self.assertEqual(parsed_uri.port, 1234)

        # incorrect uri - bucket can't contain '@'
        self.assertRaises(RuntimeError, smart_open_lib._parse_uri,
                          "s3://access_id:access_secret@my@bucket@port/mykey")

        # incorrect uri - colon should separate secret and key
        self.assertRaises(RuntimeError, smart_open_lib._parse_uri,
                          "s3://access_id@access_secret@mybucket@port/mykey")