예제 #1
0
    def test_command(self, mock_s3_client):
        mock_s3 = MockS3Client()
        mock_s3_client.return_value = mock_s3

        self.create_archive(
            Archive.TYPE_FLOWRUN,
            "D",
            date(2020, 8, 1),
            [{
                "id": 1,
                "created_on": "2020-07-30T10:00:00Z"
            }, {
                "id": 2,
                "created_on": "2020-07-30T15:00:00Z"
            }],
            s3=mock_s3,
        )

        out = StringIO()
        call_command("search_archives",
                     self.org.id,
                     "run",
                     expression="",
                     limit=10,
                     stdout=out)

        self.assertIn('"id": 1', out.getvalue())
        self.assertIn("Fetched 2 records in", out.getvalue())
예제 #2
0
    def test_iter_records(self):
        archive = Archive.objects.create(
            org=self.org,
            archive_type=Archive.TYPE_FLOWRUN,
            size=10,
            hash=uuid4().hex,
            url=f"http://s3-bucket.aws.com/my/32562662.jsonl.gz",
            record_count=2,
            start_date=timezone.now(),
            period="D",
            build_time=23425,
        )

        mock_s3 = MockS3Client()
        mock_s3.put_jsonl("s3-bucket", "my/32562662.jsonl.gz", [{
            "id": 1
        }, {
            "id": 2
        }, {
            "id": 3
        }])

        with patch("temba.archives.models.Archive.s3_client",
                   return_value=mock_s3):
            records_iter = archive.iter_records()

            self.assertEqual(next(records_iter), {"id": 1})
            self.assertEqual(next(records_iter), {"id": 2})
            self.assertEqual(next(records_iter), {"id": 3})
            self.assertRaises(StopIteration, next, records_iter)
예제 #3
0
    def test_get_body(self):
        mock_s3 = MockS3Client()
        mock_s3.objects[("foo", "test/12345")] = io.StringIO("12345_content")

        with patch("temba.utils.s3.s3.client", return_value=mock_s3):
            body = get_body("https://foo.s3.aws.amazon.com/test/12345")
            self.assertEqual(body, "12345_content")
예제 #4
0
    def test_iter_records(self):
        mock_s3 = MockS3Client()
        archive = self.create_archive(Archive.TYPE_MSG,
                                      "D",
                                      timezone.now().date(), [{
                                          "id": 1
                                      }, {
                                          "id": 2
                                      }, {
                                          "id": 3
                                      }],
                                      s3=mock_s3)

        with patch("temba.archives.models.Archive.s3_client",
                   return_value=mock_s3):
            records_iter = archive.iter_records()

            self.assertEqual(next(records_iter), {"id": 1})
            self.assertEqual(next(records_iter), {"id": 2})
            self.assertEqual(next(records_iter), {"id": 3})
            self.assertRaises(StopIteration, next, records_iter)
예제 #5
0
    def test_iter_all_records(self, mock_s3_client):
        mock_s3 = MockS3Client()
        mock_s3_client.return_value = mock_s3

        d1 = self.create_archive(
            Archive.TYPE_MSG,
            "D",
            date(2020, 7, 31),
            [{
                "id": 1,
                "created_on": "2020-07-30T10:00:00Z"
            }, {
                "id": 2,
                "created_on": "2020-07-30T15:00:00Z"
            }],
            s3=mock_s3,
        )
        self.create_archive(
            Archive.TYPE_MSG,
            "M",
            date(2020, 7, 1),
            [{
                "id": 1,
                "created_on": "2020-07-30T10:00:00Z"
            }, {
                "id": 2,
                "created_on": "2020-07-30T15:00:00Z"
            }],
            rollup_of=(d1, ),
            s3=mock_s3,
        )
        self.create_archive(
            Archive.TYPE_MSG,
            "D",
            date(2020, 8, 1),
            [{
                "id": 3,
                "created_on": "2020-08-01T10:00:00Z"
            }, {
                "id": 4,
                "created_on": "2020-08-01T15:00:00Z"
            }],
            s3=mock_s3,
        )
        self.create_archive(
            Archive.TYPE_FLOWRUN,
            "D",
            date(2020, 8, 1),
            [{
                "id": 3,
                "created_on": "2020-08-01T10:00:00Z"
            }, {
                "id": 4,
                "created_on": "2020-08-01T15:00:00Z"
            }],
            s3=mock_s3,
        )
        self.create_archive(
            Archive.TYPE_MSG,
            "D",
            date(2020, 8, 2),
            [{
                "id": 5,
                "created_on": "2020-08-02T10:00:00Z"
            }, {
                "id": 6,
                "created_on": "2020-08-02T15:00:00Z"
            }],
            s3=mock_s3,
        )

        def assert_records(record_iter, ids):
            self.assertEqual(ids, [r["id"] for r in list(record_iter)])

        assert_records(Archive.iter_all_records(self.org, Archive.TYPE_MSG),
                       [1, 2, 3, 4, 5, 6])
        assert_records(
            Archive.iter_all_records(self.org,
                                     Archive.TYPE_MSG,
                                     after=datetime(2020, 7, 30, 12, 0, 0, 0,
                                                    pytz.UTC)),
            [2, 3, 4, 5, 6],
        )
        assert_records(
            Archive.iter_all_records(self.org,
                                     Archive.TYPE_MSG,
                                     before=datetime(2020, 8, 2, 12, 0, 0, 0,
                                                     pytz.UTC)),
            [1, 2, 3, 4, 5],
        )
        assert_records(
            Archive.iter_all_records(
                self.org,
                Archive.TYPE_MSG,
                after=datetime(2020, 7, 30, 12, 0, 0, 0, pytz.UTC),
                before=datetime(2020, 8, 2, 12, 0, 0, 0, pytz.UTC),
            ),
            [2, 3, 4, 5],
        )
예제 #6
0
    def test_rewrite(self, mock_s3_client):
        mock_s3 = MockS3Client()
        mock_s3_client.return_value = mock_s3

        archive = self.create_archive(
            Archive.TYPE_FLOWRUN,
            "D",
            date(2020, 8, 1),
            [
                {
                    "id": 1,
                    "created_on": "2020-08-01T09:00:00Z",
                    "contact": {
                        "name": "Bob"
                    }
                },
                {
                    "id": 2,
                    "created_on": "2020-08-01T10:00:00Z",
                    "contact": {
                        "name": "Jim"
                    }
                },
                {
                    "id": 3,
                    "created_on": "2020-08-01T15:00:00Z",
                    "contact": {
                        "name": "Bob"
                    }
                },
            ],
            s3=mock_s3,
        )

        bucket, key = archive.get_storage_location()
        self.assertEqual({(bucket, key)}, set(mock_s3.objects.keys()))
        self.assertEqual(1, len(mock_s3.calls["put_object"]))

        def purge_jim(record):
            return record if record["contact"]["name"] != "Jim" else None

        archive.rewrite(purge_jim, delete_old=True)

        bucket, new_key = archive.get_storage_location()
        self.assertNotEqual(key, new_key)
        self.assertEqual({(bucket, new_key)}, set(mock_s3.objects.keys()))

        self.assertEqual(32, len(archive.hash))
        self.assertEqual(
            f"https://s3-bucket.s3.amazonaws.com/{self.org.id}/run_D20200801_{archive.hash}.jsonl.gz",
            archive.url)

        hash_b64 = base64.standard_b64encode(bytes.fromhex(
            archive.hash)).decode()

        self.assertEqual(2, len(mock_s3.calls["put_object"]))

        kwargs = mock_s3.calls["put_object"][1][2]
        self.assertEqual("s3-bucket", kwargs["Bucket"])
        self.assertEqual(
            f"{self.org.id}/run_D20200801_{archive.hash}.jsonl.gz",
            kwargs["Key"])
        self.assertEqual(hash_b64, kwargs["ContentMD5"])
        self.assertEqual([call(Bucket="s3-bucket", Key=key)],
                         mock_s3.calls["delete_object"])
예제 #7
0
    def test_iter_records(self, mock_s3_client):
        mock_s3 = MockS3Client()
        mock_s3_client.return_value = mock_s3

        archive = self.create_archive(Archive.TYPE_MSG,
                                      "D",
                                      timezone.now().date(), [{
                                          "id": 1
                                      }, {
                                          "id": 2
                                      }, {
                                          "id": 3
                                      }],
                                      s3=mock_s3)
        bucket, key = archive.get_storage_location()

        # can fetch records without any filtering
        records_iter = archive.iter_records()

        self.assertEqual(next(records_iter), {"id": 1})
        self.assertEqual(next(records_iter), {"id": 2})
        self.assertEqual(next(records_iter), {"id": 3})
        self.assertRaises(StopIteration, next, records_iter)
        self.assertEqual(mock_s3.calls["get_object"][-1],
                         call(Bucket="s3-bucket", Key=key))

        # can filter using where dict
        records_iter = archive.iter_records(where={"id__gt": 1})

        self.assertEqual([{"id": 2}, {"id": 3}], [r for r in records_iter])
        self.assertEqual(
            mock_s3.calls["select_object_content"][-1],
            call(
                Bucket="s3-bucket",
                Key=key,
                Expression="SELECT s.* FROM s3object s WHERE s.id > 1",
                ExpressionType="SQL",
                InputSerialization={
                    "CompressionType": "GZIP",
                    "JSON": {
                        "Type": "LINES"
                    }
                },
                OutputSerialization={"JSON": {
                    "RecordDelimiter": "\n"
                }},
            ),
        )
        # can also filter using raw where string (used by search_archives command)
        records_iter = archive.iter_records(where={"__raw__": "s.id < 3"})

        self.assertEqual([{"id": 1}, {"id": 2}], list(records_iter))

        self.assertEqual(
            mock_s3.calls["select_object_content"][-1],
            call(
                Bucket="s3-bucket",
                Key=key,
                Expression="SELECT s.* FROM s3object s WHERE s.id < 3",
                ExpressionType="SQL",
                InputSerialization={
                    "CompressionType": "GZIP",
                    "JSON": {
                        "Type": "LINES"
                    }
                },
                OutputSerialization={"JSON": {
                    "RecordDelimiter": "\n"
                }},
            ),
        )