def test_save_str_stream(self): data = {"key1": "foo", "key2": "bar"} data_str = json.dumps(data) s3stream = S3Stream(io.StringIO(data_str)) # mock s3 client s3client = boto3.client("s3") s3client.upload_fileobj = MagicMock( return_value={"msg": "boto3 response"}) resource = S3Resource( "filename.ext", content_type="application/json", prefix="prefix/", bucketname="bucketname", protocol="protocol://", stream=s3stream, s3client=s3client, Metadata={"tag": "metadata"}, ) resource.save() args, _ = s3client.upload_fileobj.call_args stream, _, _ = args output = stream.read() self.assertTrue(isinstance(output, bytes)) self.assertDictEqual(data, json.loads(output.decode("utf-8")))
def test_load_dict(self): data = {"key1": "foo", "key2": "bar"} data_str = json.dumps(data) resource = S3Resource( "filename.ext", content_type="application/json", prefix="prefix/", bucketname="bucketname", protocol="protocol://", stream=S3Stream(io.StringIO(data_str)), ) # pass in a transform func and do not unpack content transformed = resource.load(lambda content: { **content, "extra": True }, unpack=False) self.assertDictEqual(transformed, { "key1": "foo", "key2": "bar", "extra": True }) # pass in a basemodel and unpack content class DummyClass(BaseModel): """dummy class""" key1: str key2: str dummy = resource.load(DummyClass) self.assertIsInstance(dummy, DummyClass) self.assertDictEqual(dummy.dict(), data)
def test_basic_ok(self): data = {"key1": "foo", "key2": "bar"} data_str = json.dumps(data) s3stream = S3Stream(io.StringIO(data_str)) resource = S3Resource( "filename.ext", content_type="application/json", prefix="prefix/", bucketname="bucketname", protocol="protocol://", stream=s3stream, Metadata={"tag": "metadata"}, ) self.assertEqual(resource.key, "prefix/filename.ext") self.assertEqual(resource.uri, "protocol://bucketname/prefix/filename.ext") self.assertEqual(resource.content_type, "application/json") self.assertEqual(resource.stream, s3stream) self.assertDictEqual(resource.extra_args, {"Metadata": { "tag": "metadata" }}) self.assertEqual(resource.read(), data_str) self.assertDictEqual(resource.load(), data)
def test_save_bin_stream(self): data = {"key1": "foo", "key2": "bar"} data_str = json.dumps(data) s3stream = S3Stream(io.BytesIO(data_str.encode("utf-8"))) # mock s3 client s3client = boto3.client("s3") s3client.upload_fileobj = MagicMock( return_value={"msg": "boto3 response"}) resource = S3Resource( "filename.ext", content_type="application/json", prefix="prefix/", bucketname="bucketname", protocol="protocol://", stream=s3stream, s3client=s3client, Metadata={"tag": "metadata"}, ) resource.save() s3client.upload_fileobj.assert_called_with( s3stream, "bucketname", "prefix/filename.ext", ExtraArgs={ "ContentType": "application/json", "Metadata": { "tag": "metadata" }, }, ) self.assertDictEqual(resource.last_resp, {"msg": "boto3 response"})
def create_resource(self, filename: str, content_type: str = "", obj: Any = None, protocol: str = "s3a://", metadata: Dict[str, str] = None, pandas_kwargs: dict = None, **kwargs) -> S3Resource: """ Creates a new instance of S3Resource binds to the current bucket. Example:: # create S3Resource in bucket to read in foo = prj_bucket.create_resource("foo.json", "application/json") # read "s3a://some_bucket/prj-a/foo.json" and load as a dict (or list) foo_dict = foo.load() # create S3Resource in bucket and save to "s3a://some_bucket/prj-a/foo.json" prj_bucket.create_resource("foo.json", obj={"foo": "bar"}).save() Args: filename (str): name of the resource. content_type (str, optional): mime type. Defaults to "application/octet-stream". obj (Any, optional): python object to convert into a resource. Defaults to None. protocol (str, optional): protocol. Defaults to "s3a://". stream (Union[io.StringIO, io.BytesIO, IO[StringOrBytes]], optional): content of the resource. Defaults to None. metadata (dict, optional): metadata for the object. Defaults to None. pandas_kwargs: Any additional args to pass to `pandas`. **kwargs: Any additional args to pass to `S3Resource`. Returns: S3Resource: a S3Resource related to the active S3Bucket. """ stream = (S3Stream.from_any(obj, content_type, **(pandas_kwargs or {})) if obj is not None else None) if not content_type: if stream: content_type = stream.content_type return S3Resource(filename=filename, prefix=self.prefix, bucketname=self.name, protocol=protocol, content_type=content_type or "application/octet-stream", stream=stream, s3client=self._s3client, Metadata=metadata or {}, **kwargs)
def test_not_s3_stream(self): data = {"key1": "foo", "key2": "bar"} data_str = json.dumps(data) resource = S3Resource( "filename.ext", content_type="application/json", prefix="prefix/", bucketname="bucketname", protocol="protocol://", stream=data_str, ) self.assertTrue(isinstance(resource.stream, S3Stream)) self.assertEqual(resource.content_type, "application/json") self.assertEqual(resource.read(), data_str) self.assertDictEqual(resource.load(), data)
def test_load_list(self): data = ["a", "b", "c"] data_str = json.dumps(data) resource = S3Resource( "filename.ext", content_type="application/json", stream=S3Stream(io.StringIO(data_str)), ) # pass in a transform func and do not unpack content transformed = resource.load(lambda content: content + ["d"], unpack=False) self.assertListEqual(transformed, ["a", "b", "c", "d"]) # pass in a transform func and unpack content transformed = resource.load(lambda a, b, c: "%s:%s:%s" % (a, b, c), unpack=True) self.assertEqual(transformed, "a:b:c")
def test_full_path(self): data = {"key1": "foo", "key2": "bar"} data_str = json.dumps(data) s3stream = S3Stream(io.StringIO(data_str)) resource = S3Resource( "subprefix/filename.ext", content_type="application/json", prefix="prefix/", bucketname="bucketname", protocol="protocol://", stream=s3stream, ) self.assertEqual(resource.filename, "filename.ext") self.assertEqual(resource.prefix, "prefix/subprefix/") self.assertEqual(resource.key, "prefix/subprefix/filename.ext") self.assertEqual( resource.uri, "protocol://bucketname/prefix/subprefix/filename.ext")
def to_s3_resource(item): """Converts the response object from s3.list_objects_v2 into a S3Resource.""" key = item.get("Key", "") chunks = key.split("/") if len(chunks) >= 2: filename = chunks[-1] prefix = "%s/" % "/".join(chunks[0:-1]) else: filename = key prefix = "" return S3Resource( filename=filename, content_type="application/octet-stream", prefix=prefix, bucketname=self.name, s3client=self._s3client, stats=item, )