def test_command(self, mock_s3_client): mock_s3 = MockS3Client() mock_s3_client.return_value = mock_s3 self.create_archive( Archive.TYPE_FLOWRUN, "D", date(2020, 8, 1), [{ "id": 1, "created_on": "2020-07-30T10:00:00Z" }, { "id": 2, "created_on": "2020-07-30T15:00:00Z" }], s3=mock_s3, ) out = StringIO() call_command("search_archives", self.org.id, "run", expression="", limit=10, stdout=out) self.assertIn('"id": 1', out.getvalue()) self.assertIn("Fetched 2 records in", out.getvalue())
def test_iter_records(self): archive = Archive.objects.create( org=self.org, archive_type=Archive.TYPE_FLOWRUN, size=10, hash=uuid4().hex, url=f"http://s3-bucket.aws.com/my/32562662.jsonl.gz", record_count=2, start_date=timezone.now(), period="D", build_time=23425, ) mock_s3 = MockS3Client() mock_s3.put_jsonl("s3-bucket", "my/32562662.jsonl.gz", [{ "id": 1 }, { "id": 2 }, { "id": 3 }]) with patch("temba.archives.models.Archive.s3_client", return_value=mock_s3): records_iter = archive.iter_records() self.assertEqual(next(records_iter), {"id": 1}) self.assertEqual(next(records_iter), {"id": 2}) self.assertEqual(next(records_iter), {"id": 3}) self.assertRaises(StopIteration, next, records_iter)
def test_get_body(self): mock_s3 = MockS3Client() mock_s3.objects[("foo", "test/12345")] = io.StringIO("12345_content") with patch("temba.utils.s3.s3.client", return_value=mock_s3): body = get_body("https://foo.s3.aws.amazon.com/test/12345") self.assertEqual(body, "12345_content")
def test_iter_records(self): mock_s3 = MockS3Client() archive = self.create_archive(Archive.TYPE_MSG, "D", timezone.now().date(), [{ "id": 1 }, { "id": 2 }, { "id": 3 }], s3=mock_s3) with patch("temba.archives.models.Archive.s3_client", return_value=mock_s3): records_iter = archive.iter_records() self.assertEqual(next(records_iter), {"id": 1}) self.assertEqual(next(records_iter), {"id": 2}) self.assertEqual(next(records_iter), {"id": 3}) self.assertRaises(StopIteration, next, records_iter)
def test_iter_all_records(self, mock_s3_client): mock_s3 = MockS3Client() mock_s3_client.return_value = mock_s3 d1 = self.create_archive( Archive.TYPE_MSG, "D", date(2020, 7, 31), [{ "id": 1, "created_on": "2020-07-30T10:00:00Z" }, { "id": 2, "created_on": "2020-07-30T15:00:00Z" }], s3=mock_s3, ) self.create_archive( Archive.TYPE_MSG, "M", date(2020, 7, 1), [{ "id": 1, "created_on": "2020-07-30T10:00:00Z" }, { "id": 2, "created_on": "2020-07-30T15:00:00Z" }], rollup_of=(d1, ), s3=mock_s3, ) self.create_archive( Archive.TYPE_MSG, "D", date(2020, 8, 1), [{ "id": 3, "created_on": "2020-08-01T10:00:00Z" }, { "id": 4, "created_on": "2020-08-01T15:00:00Z" }], s3=mock_s3, ) self.create_archive( Archive.TYPE_FLOWRUN, "D", date(2020, 8, 1), [{ "id": 3, "created_on": "2020-08-01T10:00:00Z" }, { "id": 4, "created_on": "2020-08-01T15:00:00Z" }], s3=mock_s3, ) self.create_archive( Archive.TYPE_MSG, "D", date(2020, 8, 2), [{ "id": 5, "created_on": "2020-08-02T10:00:00Z" }, { "id": 6, "created_on": "2020-08-02T15:00:00Z" }], s3=mock_s3, ) def assert_records(record_iter, ids): self.assertEqual(ids, [r["id"] for r in list(record_iter)]) assert_records(Archive.iter_all_records(self.org, Archive.TYPE_MSG), [1, 2, 3, 4, 5, 6]) assert_records( Archive.iter_all_records(self.org, Archive.TYPE_MSG, after=datetime(2020, 7, 30, 12, 0, 0, 0, pytz.UTC)), [2, 3, 4, 5, 6], ) assert_records( Archive.iter_all_records(self.org, Archive.TYPE_MSG, before=datetime(2020, 8, 2, 12, 0, 0, 0, pytz.UTC)), [1, 2, 3, 4, 5], ) assert_records( Archive.iter_all_records( self.org, Archive.TYPE_MSG, after=datetime(2020, 7, 30, 12, 0, 0, 0, pytz.UTC), before=datetime(2020, 8, 2, 12, 0, 0, 0, pytz.UTC), ), [2, 3, 4, 5], )
def test_rewrite(self, mock_s3_client): mock_s3 = MockS3Client() mock_s3_client.return_value = mock_s3 archive = self.create_archive( Archive.TYPE_FLOWRUN, "D", date(2020, 8, 1), [ { "id": 1, "created_on": "2020-08-01T09:00:00Z", "contact": { "name": "Bob" } }, { "id": 2, "created_on": "2020-08-01T10:00:00Z", "contact": { "name": "Jim" } }, { "id": 3, "created_on": "2020-08-01T15:00:00Z", "contact": { "name": "Bob" } }, ], s3=mock_s3, ) bucket, key = archive.get_storage_location() self.assertEqual({(bucket, key)}, set(mock_s3.objects.keys())) self.assertEqual(1, len(mock_s3.calls["put_object"])) def purge_jim(record): return record if record["contact"]["name"] != "Jim" else None archive.rewrite(purge_jim, delete_old=True) bucket, new_key = archive.get_storage_location() self.assertNotEqual(key, new_key) self.assertEqual({(bucket, new_key)}, set(mock_s3.objects.keys())) self.assertEqual(32, len(archive.hash)) self.assertEqual( f"https://s3-bucket.s3.amazonaws.com/{self.org.id}/run_D20200801_{archive.hash}.jsonl.gz", archive.url) hash_b64 = base64.standard_b64encode(bytes.fromhex( archive.hash)).decode() self.assertEqual(2, len(mock_s3.calls["put_object"])) kwargs = mock_s3.calls["put_object"][1][2] self.assertEqual("s3-bucket", kwargs["Bucket"]) self.assertEqual( f"{self.org.id}/run_D20200801_{archive.hash}.jsonl.gz", kwargs["Key"]) self.assertEqual(hash_b64, kwargs["ContentMD5"]) self.assertEqual([call(Bucket="s3-bucket", Key=key)], mock_s3.calls["delete_object"])
def test_iter_records(self, mock_s3_client): mock_s3 = MockS3Client() mock_s3_client.return_value = mock_s3 archive = self.create_archive(Archive.TYPE_MSG, "D", timezone.now().date(), [{ "id": 1 }, { "id": 2 }, { "id": 3 }], s3=mock_s3) bucket, key = archive.get_storage_location() # can fetch records without any filtering records_iter = archive.iter_records() self.assertEqual(next(records_iter), {"id": 1}) self.assertEqual(next(records_iter), {"id": 2}) self.assertEqual(next(records_iter), {"id": 3}) self.assertRaises(StopIteration, next, records_iter) self.assertEqual(mock_s3.calls["get_object"][-1], call(Bucket="s3-bucket", Key=key)) # can filter using where dict records_iter = archive.iter_records(where={"id__gt": 1}) self.assertEqual([{"id": 2}, {"id": 3}], [r for r in records_iter]) self.assertEqual( mock_s3.calls["select_object_content"][-1], call( Bucket="s3-bucket", Key=key, Expression="SELECT s.* FROM s3object s WHERE s.id > 1", ExpressionType="SQL", InputSerialization={ "CompressionType": "GZIP", "JSON": { "Type": "LINES" } }, OutputSerialization={"JSON": { "RecordDelimiter": "\n" }}, ), ) # can also filter using raw where string (used by search_archives command) records_iter = archive.iter_records(where={"__raw__": "s.id < 3"}) self.assertEqual([{"id": 1}, {"id": 2}], list(records_iter)) self.assertEqual( mock_s3.calls["select_object_content"][-1], call( Bucket="s3-bucket", Key=key, Expression="SELECT s.* FROM s3object s WHERE s.id < 3", ExpressionType="SQL", InputSerialization={ "CompressionType": "GZIP", "JSON": { "Type": "LINES" } }, OutputSerialization={"JSON": { "RecordDelimiter": "\n" }}, ), )