Esempio n. 1
0
    def test_load_dict(self):
        data = {"key1": "foo", "key2": "bar"}
        data_str = json.dumps(data)
        resource = S3Resource(
            "filename.ext",
            content_type="application/json",
            prefix="prefix/",
            bucketname="bucketname",
            protocol="protocol://",
            stream=S3Stream(io.StringIO(data_str)),
        )

        # pass in a transform func and do not unpack content
        transformed = resource.load(lambda content: {
            **content, "extra": True
        },
                                    unpack=False)
        self.assertDictEqual(transformed, {
            "key1": "foo",
            "key2": "bar",
            "extra": True
        })

        # pass in a basemodel and unpack content
        class DummyClass(BaseModel):
            """dummy class"""

            key1: str
            key2: str

        dummy = resource.load(DummyClass)
        self.assertIsInstance(dummy, DummyClass)
        self.assertDictEqual(dummy.dict(), data)
Esempio n. 2
0
    def test_save_str_stream(self):
        data = {"key1": "foo", "key2": "bar"}
        data_str = json.dumps(data)
        s3stream = S3Stream(io.StringIO(data_str))

        # mock s3 client
        s3client = boto3.client("s3")
        s3client.upload_fileobj = MagicMock(
            return_value={"msg": "boto3 response"})

        resource = S3Resource(
            "filename.ext",
            content_type="application/json",
            prefix="prefix/",
            bucketname="bucketname",
            protocol="protocol://",
            stream=s3stream,
            s3client=s3client,
            Metadata={"tag": "metadata"},
        )
        resource.save()
        args, _ = s3client.upload_fileobj.call_args
        stream, _, _ = args
        output = stream.read()

        self.assertTrue(isinstance(output, bytes))
        self.assertDictEqual(data, json.loads(output.decode("utf-8")))
Esempio n. 3
0
    def test_basic_ok(self):
        data = {"key1": "foo", "key2": "bar"}
        data_str = json.dumps(data)
        s3stream = S3Stream(io.StringIO(data_str))
        resource = S3Resource(
            "filename.ext",
            content_type="application/json",
            prefix="prefix/",
            bucketname="bucketname",
            protocol="protocol://",
            stream=s3stream,
            Metadata={"tag": "metadata"},
        )

        self.assertEqual(resource.key, "prefix/filename.ext")
        self.assertEqual(resource.uri,
                         "protocol://bucketname/prefix/filename.ext")
        self.assertEqual(resource.content_type, "application/json")
        self.assertEqual(resource.stream, s3stream)
        self.assertDictEqual(resource.extra_args,
                             {"Metadata": {
                                 "tag": "metadata"
                             }})
        self.assertEqual(resource.read(), data_str)
        self.assertDictEqual(resource.load(), data)
Esempio n. 4
0
    def test_save_bin_stream(self):
        data = {"key1": "foo", "key2": "bar"}
        data_str = json.dumps(data)
        s3stream = S3Stream(io.BytesIO(data_str.encode("utf-8")))

        # mock s3 client
        s3client = boto3.client("s3")
        s3client.upload_fileobj = MagicMock(
            return_value={"msg": "boto3 response"})

        resource = S3Resource(
            "filename.ext",
            content_type="application/json",
            prefix="prefix/",
            bucketname="bucketname",
            protocol="protocol://",
            stream=s3stream,
            s3client=s3client,
            Metadata={"tag": "metadata"},
        )
        resource.save()
        s3client.upload_fileobj.assert_called_with(
            s3stream,
            "bucketname",
            "prefix/filename.ext",
            ExtraArgs={
                "ContentType": "application/json",
                "Metadata": {
                    "tag": "metadata"
                },
            },
        )
        self.assertDictEqual(resource.last_resp, {"msg": "boto3 response"})
Esempio n. 5
0
    def __init__(self,
                 filename: str,
                 content_type: str = "",
                 bucketname: str = "",
                 prefix: str = "",
                 protocol: str = "s3a://",
                 stream: S3Stream[StringOrBytes] = None,
                 s3client: boto3.client = None,
                 stats: dict = None,
                 **kwargs):
        """
        Creates a new instance of S3Resource, which will use
        `boto3.s3.transfer.S3Transfer` under the hood to download/upload the s3
        resource.

        See
        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.S3Transfer

        Args:
            filename (str): filename of the object.
            content_type (str, optional): mime type of the object. Defaults to "".
            bucketname (str, optional): name of the bucket the obj is or should be.
                Defaults to "".
            prefix (str, optional): prefix to be added to the filename to get the s3
                object key. Defaults to "application/octet-stream".
            protocol (str, optional): s3 client protocol. Defaults to "s3a://".
            stream (S3Stream[StringOrBytes], optional): data stream. Defaults to None.
            s3_client (boto3.client, optional): s3 client to use to retrieve
                resource. Defaults to None.
            Metadata (dict, optional): metadata for the object. Defaults to None.
            **kwargs: Any additional args to pass to `boto3.s3.transfer.S3Transfer`
                function.
        """
        # random name if filename is not provided
        filename = filename or uuid4().hex
        dirname = os.path.dirname(filename)

        if dirname:
            filename = filename[len(dirname) + 1:]
            prefix = os.path.join(prefix, dirname) + os.path.sep

        if stream:
            if not isinstance(stream, S3Stream):
                stream = S3Stream.from_any(stream, content_type)
            if content_type:
                stream.content_type = content_type

        self.filename = filename
        self._content_type = content_type
        self.bucketname = bucketname
        self.prefix = prefix
        self.protocol = protocol
        self._stream: Optional[S3Stream[StringOrBytes]] = stream
        self.extra_args = kwargs
        self.s3client = s3client
        self.last_resp = None
        self.stats = stats
Esempio n. 6
0
 def test_pandas_dict(self):
     data = pd.DataFrame(data=[{"name": "a"}, {"name": "b"}])
     stream = S3Stream.from_any(data, output_as="json", orient="records")
     self.assertEqual(stream.content_type, "application/json")
     self.assertEqual(json.loads(stream.read()), [{
         "name": "a"
     }, {
         "name": "b"
     }])
Esempio n. 7
0
    def create_resource(self,
                        filename: str,
                        content_type: str = "",
                        obj: Any = None,
                        protocol: str = "s3a://",
                        metadata: Dict[str, str] = None,
                        pandas_kwargs: dict = None,
                        **kwargs) -> S3Resource:
        """
        Creates a new instance of S3Resource binds to the current bucket.

        Example::

            # create S3Resource in bucket to read in
            foo = prj_bucket.create_resource("foo.json", "application/json")
            # read "s3a://some_bucket/prj-a/foo.json" and load as a dict (or list)
            foo_dict = foo.load()

            # create S3Resource in bucket and save to "s3a://some_bucket/prj-a/foo.json"
            prj_bucket.create_resource("foo.json", obj={"foo": "bar"}).save()


        Args:
            filename (str): name of the resource.
            content_type (str, optional): mime type. Defaults to
                "application/octet-stream".
            obj (Any, optional): python object to convert into a resource. Defaults
                to None.
            protocol (str, optional): protocol. Defaults to "s3a://".
            stream (Union[io.StringIO, io.BytesIO, IO[StringOrBytes]], optional):
                content of the resource. Defaults to None.
            metadata (dict, optional): metadata for the object. Defaults to None.
            pandas_kwargs: Any additional args to pass to `pandas`.
            **kwargs: Any additional args to pass to `S3Resource`.

        Returns:
            S3Resource: a S3Resource related to the active S3Bucket.
        """
        stream = (S3Stream.from_any(obj, content_type, **(pandas_kwargs or {}))
                  if obj is not None else None)

        if not content_type:
            if stream:
                content_type = stream.content_type

        return S3Resource(filename=filename,
                          prefix=self.prefix,
                          bucketname=self.name,
                          protocol=protocol,
                          content_type=content_type
                          or "application/octet-stream",
                          stream=stream,
                          s3client=self._s3client,
                          Metadata=metadata or {},
                          **kwargs)
Esempio n. 8
0
    def test_model(self):
        class Model(BaseModel):
            """dummy model"""

            name: str
            value: int

        data = Model(name="foo", value=10)
        stream = S3Stream.from_any(data, content_type="text/plain")
        self.assertEqual(stream.content_type, "application/json")
        self.assertDictEqual(json.loads(stream.read()), data.dict())
Esempio n. 9
0
    def test_pickle(self):
        class Model:
            """dummy class"""
            def __init__(self, name: str, value: int):
                self.name = name
                self.value = value

        joblib.dump = MagicMock()
        data = Model(name="foo", value=10)
        stream = S3Stream.from_any(data,
                                   content_type="application/octet-stream")
        self.assertEqual(stream.content_type, "application/octet-stream")
        joblib.dump.assert_called_once()
Esempio n. 10
0
    def test_load_list(self):
        data = ["a", "b", "c"]
        data_str = json.dumps(data)
        resource = S3Resource(
            "filename.ext",
            content_type="application/json",
            stream=S3Stream(io.StringIO(data_str)),
        )

        # pass in a transform func and do not unpack content
        transformed = resource.load(lambda content: content + ["d"],
                                    unpack=False)
        self.assertListEqual(transformed, ["a", "b", "c", "d"])
        # pass in a transform func and unpack content
        transformed = resource.load(lambda a, b, c: "%s:%s:%s" % (a, b, c),
                                    unpack=True)
        self.assertEqual(transformed, "a:b:c")
Esempio n. 11
0
    def stream(self) -> S3Stream[StringOrBytes]:
        """data stream for the resource."""
        if self._stream:
            return self._stream

        if self.bucketname:
            stream = io.BytesIO()
            s3client = self.s3client or boto3.client("s3")
            self.last_resp = s3client.download_fileobj(
                self.bucketname, self.key, stream, ExtraArgs=self.extra_args)
            stream.seek(0)  # reset to initial counter
            self._stream = S3Stream(stream, self._content_type)
            # overwrite infered mime if provided
            if self._content_type:
                self._stream.content_type = self._content_type
            return self._stream

        raise RuntimeError("S3Resource does not have a stream.")
Esempio n. 12
0
    def test_full_path(self):
        data = {"key1": "foo", "key2": "bar"}
        data_str = json.dumps(data)
        s3stream = S3Stream(io.StringIO(data_str))
        resource = S3Resource(
            "subprefix/filename.ext",
            content_type="application/json",
            prefix="prefix/",
            bucketname="bucketname",
            protocol="protocol://",
            stream=s3stream,
        )

        self.assertEqual(resource.filename, "filename.ext")
        self.assertEqual(resource.prefix, "prefix/subprefix/")
        self.assertEqual(resource.key, "prefix/subprefix/filename.ext")
        self.assertEqual(
            resource.uri,
            "protocol://bucketname/prefix/subprefix/filename.ext")
Esempio n. 13
0
 def test_pandas_csv(self):
     data = pd.DataFrame(data=[{"name": "a"}, {"name": "b"}])
     stream = S3Stream.from_any(data, output_as="csv", index=False)
     self.assertEqual(stream.content_type, "application/csv")
     self.assertEqual(stream.read(), "name\na\nb\n")
Esempio n. 14
0
 def test_dict(self):
     data = {"foo": "bar"}
     stream = S3Stream.from_any(data, content_type="text/plain")
     self.assertEqual(stream.content_type, "application/json")
     self.assertDictEqual(json.loads(stream.read()), data)
Esempio n. 15
0
 def test_str(self):
     data = "foo bar"
     stream = S3Stream.from_any(data, content_type="text/plain")
     self.assertEqual(stream.content_type, "text/plain")
     self.assertEqual(stream.read(), data)