Beispiel #1
0
    def test_save_str_stream(self):
        data = {"key1": "foo", "key2": "bar"}
        data_str = json.dumps(data)
        s3stream = S3Stream(io.StringIO(data_str))

        # mock s3 client
        s3client = boto3.client("s3")
        s3client.upload_fileobj = MagicMock(
            return_value={"msg": "boto3 response"})

        resource = S3Resource(
            "filename.ext",
            content_type="application/json",
            prefix="prefix/",
            bucketname="bucketname",
            protocol="protocol://",
            stream=s3stream,
            s3client=s3client,
            Metadata={"tag": "metadata"},
        )
        resource.save()
        args, _ = s3client.upload_fileobj.call_args
        stream, _, _ = args
        output = stream.read()

        self.assertTrue(isinstance(output, bytes))
        self.assertDictEqual(data, json.loads(output.decode("utf-8")))
Beispiel #2
0
    def test_load_dict(self):
        data = {"key1": "foo", "key2": "bar"}
        data_str = json.dumps(data)
        resource = S3Resource(
            "filename.ext",
            content_type="application/json",
            prefix="prefix/",
            bucketname="bucketname",
            protocol="protocol://",
            stream=S3Stream(io.StringIO(data_str)),
        )

        # pass in a transform func and do not unpack content
        transformed = resource.load(lambda content: {
            **content, "extra": True
        },
                                    unpack=False)
        self.assertDictEqual(transformed, {
            "key1": "foo",
            "key2": "bar",
            "extra": True
        })

        # pass in a basemodel and unpack content
        class DummyClass(BaseModel):
            """dummy class"""

            key1: str
            key2: str

        dummy = resource.load(DummyClass)
        self.assertIsInstance(dummy, DummyClass)
        self.assertDictEqual(dummy.dict(), data)
Beispiel #3
0
    def test_basic_ok(self):
        data = {"key1": "foo", "key2": "bar"}
        data_str = json.dumps(data)
        s3stream = S3Stream(io.StringIO(data_str))
        resource = S3Resource(
            "filename.ext",
            content_type="application/json",
            prefix="prefix/",
            bucketname="bucketname",
            protocol="protocol://",
            stream=s3stream,
            Metadata={"tag": "metadata"},
        )

        self.assertEqual(resource.key, "prefix/filename.ext")
        self.assertEqual(resource.uri,
                         "protocol://bucketname/prefix/filename.ext")
        self.assertEqual(resource.content_type, "application/json")
        self.assertEqual(resource.stream, s3stream)
        self.assertDictEqual(resource.extra_args,
                             {"Metadata": {
                                 "tag": "metadata"
                             }})
        self.assertEqual(resource.read(), data_str)
        self.assertDictEqual(resource.load(), data)
Beispiel #4
0
    def test_save_bin_stream(self):
        data = {"key1": "foo", "key2": "bar"}
        data_str = json.dumps(data)
        s3stream = S3Stream(io.BytesIO(data_str.encode("utf-8")))

        # mock s3 client
        s3client = boto3.client("s3")
        s3client.upload_fileobj = MagicMock(
            return_value={"msg": "boto3 response"})

        resource = S3Resource(
            "filename.ext",
            content_type="application/json",
            prefix="prefix/",
            bucketname="bucketname",
            protocol="protocol://",
            stream=s3stream,
            s3client=s3client,
            Metadata={"tag": "metadata"},
        )
        resource.save()
        s3client.upload_fileobj.assert_called_with(
            s3stream,
            "bucketname",
            "prefix/filename.ext",
            ExtraArgs={
                "ContentType": "application/json",
                "Metadata": {
                    "tag": "metadata"
                },
            },
        )
        self.assertDictEqual(resource.last_resp, {"msg": "boto3 response"})
Beispiel #5
0
    def create_resource(self,
                        filename: str,
                        content_type: str = "",
                        obj: Any = None,
                        protocol: str = "s3a://",
                        metadata: Dict[str, str] = None,
                        pandas_kwargs: dict = None,
                        **kwargs) -> S3Resource:
        """
        Creates a new instance of S3Resource binds to the current bucket.

        Example::

            # create S3Resource in bucket to read in
            foo = prj_bucket.create_resource("foo.json", "application/json")
            # read "s3a://some_bucket/prj-a/foo.json" and load as a dict (or list)
            foo_dict = foo.load()

            # create S3Resource in bucket and save to "s3a://some_bucket/prj-a/foo.json"
            prj_bucket.create_resource("foo.json", obj={"foo": "bar"}).save()


        Args:
            filename (str): name of the resource.
            content_type (str, optional): mime type. Defaults to
                "application/octet-stream".
            obj (Any, optional): python object to convert into a resource. Defaults
                to None.
            protocol (str, optional): protocol. Defaults to "s3a://".
            stream (Union[io.StringIO, io.BytesIO, IO[StringOrBytes]], optional):
                content of the resource. Defaults to None.
            metadata (dict, optional): metadata for the object. Defaults to None.
            pandas_kwargs: Any additional args to pass to `pandas`.
            **kwargs: Any additional args to pass to `S3Resource`.

        Returns:
            S3Resource: a S3Resource related to the active S3Bucket.
        """
        stream = (S3Stream.from_any(obj, content_type, **(pandas_kwargs or {}))
                  if obj is not None else None)

        if not content_type:
            if stream:
                content_type = stream.content_type

        return S3Resource(filename=filename,
                          prefix=self.prefix,
                          bucketname=self.name,
                          protocol=protocol,
                          content_type=content_type
                          or "application/octet-stream",
                          stream=stream,
                          s3client=self._s3client,
                          Metadata=metadata or {},
                          **kwargs)
Beispiel #6
0
    def test_not_s3_stream(self):
        data = {"key1": "foo", "key2": "bar"}
        data_str = json.dumps(data)
        resource = S3Resource(
            "filename.ext",
            content_type="application/json",
            prefix="prefix/",
            bucketname="bucketname",
            protocol="protocol://",
            stream=data_str,
        )

        self.assertTrue(isinstance(resource.stream, S3Stream))
        self.assertEqual(resource.content_type, "application/json")
        self.assertEqual(resource.read(), data_str)
        self.assertDictEqual(resource.load(), data)
Beispiel #7
0
    def test_load_list(self):
        data = ["a", "b", "c"]
        data_str = json.dumps(data)
        resource = S3Resource(
            "filename.ext",
            content_type="application/json",
            stream=S3Stream(io.StringIO(data_str)),
        )

        # pass in a transform func and do not unpack content
        transformed = resource.load(lambda content: content + ["d"],
                                    unpack=False)
        self.assertListEqual(transformed, ["a", "b", "c", "d"])
        # pass in a transform func and unpack content
        transformed = resource.load(lambda a, b, c: "%s:%s:%s" % (a, b, c),
                                    unpack=True)
        self.assertEqual(transformed, "a:b:c")
Beispiel #8
0
    def test_full_path(self):
        data = {"key1": "foo", "key2": "bar"}
        data_str = json.dumps(data)
        s3stream = S3Stream(io.StringIO(data_str))
        resource = S3Resource(
            "subprefix/filename.ext",
            content_type="application/json",
            prefix="prefix/",
            bucketname="bucketname",
            protocol="protocol://",
            stream=s3stream,
        )

        self.assertEqual(resource.filename, "filename.ext")
        self.assertEqual(resource.prefix, "prefix/subprefix/")
        self.assertEqual(resource.key, "prefix/subprefix/filename.ext")
        self.assertEqual(
            resource.uri,
            "protocol://bucketname/prefix/subprefix/filename.ext")
Beispiel #9
0
        def to_s3_resource(item):
            """Converts the response object from s3.list_objects_v2 into a
            S3Resource."""
            key = item.get("Key", "")
            chunks = key.split("/")
            if len(chunks) >= 2:
                filename = chunks[-1]
                prefix = "%s/" % "/".join(chunks[0:-1])
            else:
                filename = key
                prefix = ""

            return S3Resource(
                filename=filename,
                content_type="application/octet-stream",
                prefix=prefix,
                bucketname=self.name,
                s3client=self._s3client,
                stats=item,
            )