def test_export_full_json(self): mock_warc_iter_cls = MagicMock() mock_warc_iter = MagicMock() mock_warc_iter_cls.side_effect = [mock_warc_iter] mock_warc_iter.iter.return_value = [ IterItem(None, None, None, None, {"key1": "k1v1", "key2": "k2v1", "key3": "k3v1"}), IterItem(None, None, None, None, {"key1": "k1v2", "key2": "k2v2", "key3": "k3v2"}), ] export_filepath = os.path.join(self.export_path, "test") now = datetime_now() limit_uids = [11, 14] exporter = BaseExporter( None, mock_warc_iter_cls, None, self.working_path, warc_base_path=self.warc_base_path, host="testhost" ) exporter._full_json_export(self.warcs, export_filepath, True, now, None, limit_uids, None) mock_warc_iter_cls.assert_called_once_with(self.warcs, limit_uids) mock_warc_iter.iter.assert_called_once_with( dedupe=True, item_date_start=now, item_date_end=None, limit_item_types=None ) file_path = export_filepath + "_001.json" self.assertTrue(os.path.exists(file_path)) with open(file_path, "r") as f: lines = f.readlines() self.assertEqual(2, len(lines)) self.assertDictEqual({"key1": "k1v1", "key2": "k2v1", "key3": "k3v1"}, json.loads(lines[0]))
def test_export_full_json(self): mock_warc_iter_cls = MagicMock() mock_warc_iter = MagicMock() mock_warc_iter_cls.side_effect = [mock_warc_iter] mock_warc_iter.iter.return_value = [ IterItem(None, None, None, None, { "key1": "k1v1", "key2": "k2v1", "key3": "k3v1" }), IterItem(None, None, None, None, { "key1": "k1v2", "key2": "k2v2", "key3": "k3v2" }) ] export_filepath = os.path.join(self.export_path, "test") now = datetime_now() limit_uids = [11, 14] exporter = BaseExporter(None, mock_warc_iter_cls, None, self.working_path, warc_base_path=self.warc_base_path, host="testhost") exporter._full_json_export(self.warcs, export_filepath, True, now, None, limit_uids, None) mock_warc_iter_cls.assert_called_once_with(self.warcs, limit_uids) mock_warc_iter.iter.assert_called_once_with(dedupe=True, item_date_start=now, item_date_end=None, limit_item_types=None) file_path = export_filepath + '_001.json' self.assertTrue(os.path.exists(file_path)) with open(file_path, "r") as f: lines = f.readlines() self.assertEqual(2, len(lines)) self.assertDictEqual({ "key1": "k1v1", "key2": "k2v1", "key3": "k3v1" }, json.loads(lines[0]))
def test_export_full_json_segment(self): mock_warc_iter_cls = MagicMock() mock_warc_iter = MagicMock() mock_warc_iter_cls.side_effect = [mock_warc_iter] mock_warc_iter.iter.return_value = [ IterItem(None, None, None, None, {"key1": "k1v1", "key2": "k2v1", "key3": "k3v1"}), IterItem(None, None, None, None, {"key1": "k1v2", "key2": "k2v2", "key3": "k3v2"}), IterItem(None, None, None, None, {"key1": "k1v3", "key2": "k2v3", "key3": "k3v3"}), IterItem(None, None, None, None, {"key1": "k1v4", "key2": "k2v4", "key3": "k3v4"}), IterItem(None, None, None, None, {"key1": "k1v5", "key2": "k2v5", "key3": "k3v5"}), IterItem(None, None, None, None, {"key1": "k1v6", "key2": "k2v6", "key3": "k3v6"}), IterItem(None, None, None, None, {"key1": "k1v7", "key2": "k2v7", "key3": "k3v7"}), ] export_filepath = os.path.join(self.export_path, "test") now = datetime_now() limit_uids = [11, 14] exporter = BaseExporter( None, mock_warc_iter_cls, None, self.working_path, warc_base_path=self.warc_base_path, host="testhost" ) exporter._full_json_export(self.warcs, export_filepath, True, now, None, limit_uids, 3) mock_warc_iter_cls.assert_called_once_with(self.warcs, limit_uids) mock_warc_iter.iter.assert_called_once_with( dedupe=True, item_date_start=now, item_date_end=None, limit_item_types=None ) # file test_1.json, test_2.json , test_3.json for idx in xrange(3): file_path = export_filepath + "_" + str(idx + 1).zfill(3) + ".json" self.assertTrue(os.path.exists(file_path)) with open(file_path, "r") as f: lines = f.readlines() # the test_3.json only has 1 row if idx == 2: self.assertEqual(1, len(lines)) else: self.assertEqual(3, len(lines)) self.assertDictEqual( {"key1": "k1v" + str(1 + idx * 3), "key2": "k2v" + str(1 + idx * 3), "key3": "k3v" + str(1 + idx * 3)}, json.loads(lines[0]), )
def test_export_full_json_segment(self): mock_warc_iter_cls = MagicMock() mock_warc_iter = MagicMock() mock_warc_iter_cls.side_effect = [mock_warc_iter] mock_warc_iter.iter.return_value = [ IterItem(None, None, None, None, { "key1": "k1v1", "key2": "k2v1", "key3": "k3v1" }), IterItem(None, None, None, None, { "key1": "k1v2", "key2": "k2v2", "key3": "k3v2" }), IterItem(None, None, None, None, { "key1": "k1v3", "key2": "k2v3", "key3": "k3v3" }), IterItem(None, None, None, None, { "key1": "k1v4", "key2": "k2v4", "key3": "k3v4" }), IterItem(None, None, None, None, { "key1": "k1v5", "key2": "k2v5", "key3": "k3v5" }), IterItem(None, None, None, None, { "key1": "k1v6", "key2": "k2v6", "key3": "k3v6" }), IterItem(None, None, None, None, { "key1": "k1v7", "key2": "k2v7", "key3": "k3v7" }) ] export_filepath = os.path.join(self.export_path, "test") now = datetime_now() limit_uids = [11, 14] exporter = BaseExporter(None, mock_warc_iter_cls, None, self.working_path, warc_base_path=self.warc_base_path, host="testhost") exporter._full_json_export(self.warcs, export_filepath, True, now, None, limit_uids, 3) mock_warc_iter_cls.assert_called_once_with(self.warcs, limit_uids) mock_warc_iter.iter.assert_called_once_with(dedupe=True, item_date_start=now, item_date_end=None, limit_item_types=None) # file test_1.json, test_2.json , test_3.json for idx in range(3): file_path = export_filepath + '_' + str(idx + 1).zfill(3) + '.json' self.assertTrue(os.path.exists(file_path)) with open(file_path, "r") as f: lines = f.readlines() # the test_3.json only has 1 row if idx == 2: self.assertEqual(1, len(lines)) else: self.assertEqual(3, len(lines)) self.assertDictEqual( { "key1": "k1v" + str(1 + idx * 3), "key2": "k2v" + str(1 + idx * 3), "key3": "k3v" + str(1 + idx * 3) }, json.loads(lines[0]))