def test_export_full_json(self):
        mock_warc_iter_cls = MagicMock()
        mock_warc_iter = MagicMock()
        mock_warc_iter_cls.side_effect = [mock_warc_iter]
        mock_warc_iter.iter.return_value = [
            IterItem(None, None, None, None, {"key1": "k1v1", "key2": "k2v1", "key3": "k3v1"}),
            IterItem(None, None, None, None, {"key1": "k1v2", "key2": "k2v2", "key3": "k3v2"}),
        ]

        export_filepath = os.path.join(self.export_path, "test")
        now = datetime_now()
        limit_uids = [11, 14]

        exporter = BaseExporter(
            None, mock_warc_iter_cls, None, self.working_path, warc_base_path=self.warc_base_path, host="testhost"
        )

        exporter._full_json_export(self.warcs, export_filepath, True, now, None, limit_uids, None)

        mock_warc_iter_cls.assert_called_once_with(self.warcs, limit_uids)
        mock_warc_iter.iter.assert_called_once_with(
            dedupe=True, item_date_start=now, item_date_end=None, limit_item_types=None
        )

        file_path = export_filepath + "_001.json"
        self.assertTrue(os.path.exists(file_path))
        with open(file_path, "r") as f:
            lines = f.readlines()
        self.assertEqual(2, len(lines))
        self.assertDictEqual({"key1": "k1v1", "key2": "k2v1", "key3": "k3v1"}, json.loads(lines[0]))
    def test_export_full_json(self):
        mock_warc_iter_cls = MagicMock()
        mock_warc_iter = MagicMock()
        mock_warc_iter_cls.side_effect = [mock_warc_iter]
        mock_warc_iter.iter.return_value = [
            IterItem(None, None, None, None, {
                "key1": "k1v1",
                "key2": "k2v1",
                "key3": "k3v1"
            }),
            IterItem(None, None, None, None, {
                "key1": "k1v2",
                "key2": "k2v2",
                "key3": "k3v2"
            })
        ]

        export_filepath = os.path.join(self.export_path, "test")
        now = datetime_now()
        limit_uids = [11, 14]

        exporter = BaseExporter(None,
                                mock_warc_iter_cls,
                                None,
                                self.working_path,
                                warc_base_path=self.warc_base_path,
                                host="testhost")

        exporter._full_json_export(self.warcs, export_filepath, True, now,
                                   None, limit_uids, None)

        mock_warc_iter_cls.assert_called_once_with(self.warcs, limit_uids)
        mock_warc_iter.iter.assert_called_once_with(dedupe=True,
                                                    item_date_start=now,
                                                    item_date_end=None,
                                                    limit_item_types=None)

        file_path = export_filepath + '_001.json'
        self.assertTrue(os.path.exists(file_path))
        with open(file_path, "r") as f:
            lines = f.readlines()
        self.assertEqual(2, len(lines))
        self.assertDictEqual({
            "key1": "k1v1",
            "key2": "k2v1",
            "key3": "k3v1"
        }, json.loads(lines[0]))
    def test_export_full_json_segment(self):
        mock_warc_iter_cls = MagicMock()
        mock_warc_iter = MagicMock()
        mock_warc_iter_cls.side_effect = [mock_warc_iter]
        mock_warc_iter.iter.return_value = [
            IterItem(None, None, None, None, {"key1": "k1v1", "key2": "k2v1", "key3": "k3v1"}),
            IterItem(None, None, None, None, {"key1": "k1v2", "key2": "k2v2", "key3": "k3v2"}),
            IterItem(None, None, None, None, {"key1": "k1v3", "key2": "k2v3", "key3": "k3v3"}),
            IterItem(None, None, None, None, {"key1": "k1v4", "key2": "k2v4", "key3": "k3v4"}),
            IterItem(None, None, None, None, {"key1": "k1v5", "key2": "k2v5", "key3": "k3v5"}),
            IterItem(None, None, None, None, {"key1": "k1v6", "key2": "k2v6", "key3": "k3v6"}),
            IterItem(None, None, None, None, {"key1": "k1v7", "key2": "k2v7", "key3": "k3v7"}),
        ]

        export_filepath = os.path.join(self.export_path, "test")
        now = datetime_now()
        limit_uids = [11, 14]

        exporter = BaseExporter(
            None, mock_warc_iter_cls, None, self.working_path, warc_base_path=self.warc_base_path, host="testhost"
        )

        exporter._full_json_export(self.warcs, export_filepath, True, now, None, limit_uids, 3)

        mock_warc_iter_cls.assert_called_once_with(self.warcs, limit_uids)
        mock_warc_iter.iter.assert_called_once_with(
            dedupe=True, item_date_start=now, item_date_end=None, limit_item_types=None
        )

        # file test_1.json, test_2.json , test_3.json
        for idx in xrange(3):
            file_path = export_filepath + "_" + str(idx + 1).zfill(3) + ".json"
            self.assertTrue(os.path.exists(file_path))
            with open(file_path, "r") as f:
                lines = f.readlines()
            # the test_3.json only has 1 row
            if idx == 2:
                self.assertEqual(1, len(lines))
            else:
                self.assertEqual(3, len(lines))
            self.assertDictEqual(
                {"key1": "k1v" + str(1 + idx * 3), "key2": "k2v" + str(1 + idx * 3), "key3": "k3v" + str(1 + idx * 3)},
                json.loads(lines[0]),
            )
    def test_export_full_json_segment(self):
        mock_warc_iter_cls = MagicMock()
        mock_warc_iter = MagicMock()
        mock_warc_iter_cls.side_effect = [mock_warc_iter]
        mock_warc_iter.iter.return_value = [
            IterItem(None, None, None, None, {
                "key1": "k1v1",
                "key2": "k2v1",
                "key3": "k3v1"
            }),
            IterItem(None, None, None, None, {
                "key1": "k1v2",
                "key2": "k2v2",
                "key3": "k3v2"
            }),
            IterItem(None, None, None, None, {
                "key1": "k1v3",
                "key2": "k2v3",
                "key3": "k3v3"
            }),
            IterItem(None, None, None, None, {
                "key1": "k1v4",
                "key2": "k2v4",
                "key3": "k3v4"
            }),
            IterItem(None, None, None, None, {
                "key1": "k1v5",
                "key2": "k2v5",
                "key3": "k3v5"
            }),
            IterItem(None, None, None, None, {
                "key1": "k1v6",
                "key2": "k2v6",
                "key3": "k3v6"
            }),
            IterItem(None, None, None, None, {
                "key1": "k1v7",
                "key2": "k2v7",
                "key3": "k3v7"
            })
        ]

        export_filepath = os.path.join(self.export_path, "test")
        now = datetime_now()
        limit_uids = [11, 14]

        exporter = BaseExporter(None,
                                mock_warc_iter_cls,
                                None,
                                self.working_path,
                                warc_base_path=self.warc_base_path,
                                host="testhost")

        exporter._full_json_export(self.warcs, export_filepath, True, now,
                                   None, limit_uids, 3)

        mock_warc_iter_cls.assert_called_once_with(self.warcs, limit_uids)
        mock_warc_iter.iter.assert_called_once_with(dedupe=True,
                                                    item_date_start=now,
                                                    item_date_end=None,
                                                    limit_item_types=None)

        # file test_1.json, test_2.json , test_3.json
        for idx in range(3):
            file_path = export_filepath + '_' + str(idx + 1).zfill(3) + '.json'
            self.assertTrue(os.path.exists(file_path))
            with open(file_path, "r") as f:
                lines = f.readlines()
            # the test_3.json only has 1 row
            if idx == 2:
                self.assertEqual(1, len(lines))
            else:
                self.assertEqual(3, len(lines))
            self.assertDictEqual(
                {
                    "key1": "k1v" + str(1 + idx * 3),
                    "key2": "k2v" + str(1 + idx * 3),
                    "key3": "k3v" + str(1 + idx * 3)
                }, json.loads(lines[0]))