def test_rw_encoding(self): """Should read and write text, respecting encodings, etc.""" conn = boto.connect_s3() conn.create_bucket("bucket") uri = smart_open.ParseUri("s3://bucket/key") text = u"расцветали яблони и груши" with smart_open.s3_open_uri(uri, "w", encoding="koi8-r") as fout: fout.write(text) with smart_open.s3_open_uri(uri, "r", encoding="koi8-r") as fin: self.assertEqual(text, fin.read()) with smart_open.s3_open_uri(uri, "rb") as fin: self.assertEqual(text.encode("koi8-r"), fin.read()) with smart_open.s3_open_uri(uri, "r", encoding="euc-jp") as fin: self.assertRaises(UnicodeDecodeError, fin.read) with smart_open.s3_open_uri(uri, "r", encoding="euc-jp", errors="replace") as fin: fin.read()
def test_gzip_write_mode(self): """Should always open in binary mode when writing through a codec.""" conn = boto.connect_s3() conn.create_bucket("bucket") uri = smart_open.ParseUri("s3://bucket/key.gz") with mock.patch('smart_open.smart_open_s3.open') as mock_open: smart_open.s3_open_uri(uri, "wb") mock_open.assert_called_with('bucket', 'key.gz', 'wb')
def test_gzip_write_mode(self): """Should always open in binary mode when writing through a codec.""" s3 = boto3.resource('s3') s3.create_bucket(Bucket='bucket') uri = smart_open.ParseUri("s3://bucket/key.gz") with mock.patch('smart_open.smart_open_s3.open') as mock_open: smart_open.s3_open_uri(uri, "wb") mock_open.assert_called_with('bucket', 'key.gz', 'wb')
def test_gzip_read_mode(self): """Should always open in binary mode when reading through a codec.""" conn = boto.connect_s3() conn.create_bucket("bucket") uri = smart_open.ParseUri("s3://bucket/key.gz") text = u"если-б я был султан и имел трёх жён, то тройной красотой был бы окружён" with smart_open.s3_open_uri(uri, "wb") as fout: fout.write(text.encode("utf-8")) with mock.patch('smart_open.smart_open_s3.open') as mock_open: smart_open.s3_open_uri(uri, "r") mock_open.assert_called_with('bucket', 'key.gz', 'rb')
def test_rw_gzip(self): """Should read/write gzip files, implicitly and explicitly.""" conn = boto.connect_s3() conn.create_bucket("bucket") uri = smart_open.ParseUri("s3://bucket/key.gz") text = u"не слышны в саду даже шорохи" with smart_open.s3_open_uri(uri, "wb") as fout: fout.write(text.encode("utf-8")) # # Check that what we've created is a gzip. # with smart_open.s3_open_uri(uri, "rb", ignore_extension=True) as fin: gz = gzip.GzipFile(fileobj=fin) self.assertEqual(gz.read().decode("utf-8"), text) # # We should be able to read it back as well. # with smart_open.s3_open_uri(uri, "rb") as fin: self.assertEqual(fin.read().decode("utf-8"), text)
def test_r(self): """Reading a UTF string should work.""" conn = boto.connect_s3() conn.create_bucket("bucket") bucket = conn.get_bucket("bucket") key = boto.s3.key.Key(bucket) key.key = "key" text = u"физкульт-привет!" key.set_contents_from_string(text.encode("utf-8")) with smart_open.s3_open_key(key, "r") as fin: self.assertEqual(fin.read(), u"физкульт-привет!") parsed_uri = smart_open.ParseUri("s3://bucket/key") with smart_open.s3_open_uri(parsed_uri, "r") as fin: self.assertEqual(fin.read(), u"физкульт-привет!")
def test_r(self): """Reading a UTF string should work.""" text = u"физкульт-привет!" s3 = boto3.resource('s3') s3.create_bucket(Bucket='bucket') key = s3.Object('bucket', 'key') key.put(Body=text.encode('utf-8')) with smart_open.s3_open_key(key, "rb") as fin: self.assertEqual(fin.read(), text.encode('utf-8')) with smart_open.s3_open_key(key, "r", encoding='utf-8') as fin: self.assertEqual(fin.read(), text) uri = smart_open.ParseUri("s3://bucket/key") with smart_open.s3_open_uri(uri, "r", encoding='utf-8') as fin: self.assertEqual(fin.read(), text)