def test_export_collection_missing_warc(self, mock_api_client_cls): mock_api_client = MagicMock(spec=ApiClient) mock_api_client_cls.side_effect = [mock_api_client] export_message = { "id": "test3", "type": "test_user", "collection": {"id": "005b131f5f854402afa2b08a4b7ba960"}, "seeds": [{"id": "005b131f5f854402afa2b08a4b7ba960", "uid": "uid1"}], "format": "csv", "segment_size": None, "path": self.export_path, } exporter = BaseExporter( "http://test", None, None, self.working_path, warc_base_path=self.warc_base_path, host="testhost" ) exporter.routing_key = "export.start.test.test_user" exporter.message = export_message exporter.on_message() mock_api_client_cls.assert_called_once_with("http://test") self.assertFalse(exporter.result.success) self.assertEqual(CODE_BAD_REQUEST, exporter.result.errors[0].code)
def test_export_collection_missing_warc(self, mock_api_client_cls): mock_api_client = MagicMock(spec=ApiClient) mock_api_client_cls.side_effect = [mock_api_client] export_message = { "id": "test3", "type": "test_user", "collection": { "id": "005b131f5f854402afa2b08a4b7ba960" }, "seeds": [{ "id": "005b131f5f854402afa2b08a4b7ba960", "uid": "uid1" }], "format": "csv", "segment_size": None, "path": self.export_path } exporter = BaseExporter("http://test", None, None, self.working_path, warc_base_path=self.warc_base_path, host="testhost") exporter.routing_key = "export.start.test.test_user" exporter.message = export_message exporter.on_message() mock_api_client_cls.assert_called_once_with("http://test") self.assertFalse(exporter.result.success) self.assertEqual(CODE_BAD_REQUEST, exporter.result.errors[0].code)
def test_export_seeds(self, mock_api_client_cls): mock_warc_iter_cls = MagicMock() mock_table_cls = MagicMock() mock_table = MagicMock(spec=BaseTable) mock_table_cls.side_effect = [mock_table] mock_table.__iter__ = Mock(return_value=iter([[("key1", "key2"), ("k1v1", "k2v1"), ("k1v2", "k2v2")]])) mock_api_client = MagicMock(spec=ApiClient) mock_api_client_cls.side_effect = [mock_api_client] mock_api_client.warcs.side_effect = [self.warcs] export_message = { "id": "test2", "type": "test_user", "seeds": [ {"id": "005b131f5f854402afa2b08a4b7ba960", "uid": "uid1"}, {"id": "105b131f5f854402afa2b08a4b7ba960", "uid": "uid2"}, ], "format": "csv", "segment_size": None, "path": self.export_path, } exporter = BaseExporter( "http://test", mock_warc_iter_cls, mock_table_cls, self.working_path, warc_base_path=self.warc_base_path, host="testhost", ) exporter.routing_key = "export.start.test.test_user" exporter.message = export_message exporter.on_message() mock_api_client_cls.assert_called_once_with("http://test") mock_api_client.warcs.assert_called_once_with( exclude_web=True, collection_id=None, seed_ids=["005b131f5f854402afa2b08a4b7ba960", "105b131f5f854402afa2b08a4b7ba960"], harvest_date_start=None, harvest_date_end=None, ) mock_table_cls.assert_called_once_with(self.warc_filepaths, False, None, None, ["uid1", "uid2"], None) self.assertTrue(exporter.result.success) csv_filepath = os.path.join(self.export_path, "test2_001.csv") self.assertTrue(os.path.exists(csv_filepath)) with open(csv_filepath, "r") as f: lines = f.readlines() self.assertEqual(3, len(lines))
def test_export_collection(self, mock_producer_cls, mock_api_client_cls): mock_warc_iter_cls = MagicMock() mock_table_cls = MagicMock() mock_table = MagicMock(spec=BaseTable) mock_table_cls.side_effect = [mock_table] mock_table.__iter__ = Mock(return_value=iter([[("key1", "key2"), ("k1v1", "k2v1"), ("k1v2", "k2v2")]])) mock_api_client = MagicMock(spec=ApiClient) mock_api_client_cls.side_effect = [mock_api_client] mock_api_client.warcs.side_effect = [self.warcs] mock_connection = MagicMock(spec=Connection) mock_exchange = MagicMock(spec=Exchange) mock_exchange.name = "test exchange" mock_producer = MagicMock(spec=Producer) mock_producer_cls.return_value = mock_producer item_date_start = "2007-01-25T12:00:00Z" item_datetime_start = iso8601.parse_date(item_date_start) item_date_end = "2008-02-25T12:00:00Z" item_datetime_end = iso8601.parse_date(item_date_end) harvest_date_start = "2007-03-25T12:00:00Z" harvest_date_end = "2008-04-25T12:00:00Z" export_message = { "id": "test1", "type": "test_user", "collection": {"id": "005b131f5f854402afa2b08a4b7ba960"}, "format": "csv", "segment_size": None, "path": self.export_path, "dedupe": True, "item_date_start": item_date_start, "item_date_end": item_date_end, "harvest_date_start": harvest_date_start, "harvest_date_end": harvest_date_end, } exporter = BaseExporter( "http://test", mock_warc_iter_cls, mock_table_cls, self.working_path, warc_base_path=self.warc_base_path, host="testhost", ) exporter.mq_config = True exporter._producer_connection = mock_connection exporter.exchange = mock_exchange exporter.routing_key = "export.start.test.test_user" exporter.message = export_message exporter.on_message() mock_api_client_cls.assert_called_once_with("http://test") mock_api_client.warcs.assert_called_once_with( exclude_web=True, collection_id="005b131f5f854402afa2b08a4b7ba960", seed_ids=[], harvest_date_start=harvest_date_start, harvest_date_end=harvest_date_end, ) mock_table_cls.assert_called_once_with( self.warc_filepaths, True, item_datetime_start, item_datetime_end, [], None ) self.assertTrue(exporter.result.success) csv_filepath = os.path.join(self.export_path, "test1_001.csv") self.assertTrue(os.path.exists(csv_filepath)) with open(csv_filepath, "r") as f: lines = f.readlines() self.assertEqual(3, len(lines)) name, _, kwargs = mock_producer.mock_calls[0] self.assertEqual("publish", name) self.assertEqual("export.status.test.test_user", kwargs["routing_key"]) export_status_message = kwargs["body"] self.assertEqual("running", export_status_message["status"]) self.assertTrue(iso8601.parse_date(export_status_message["date_started"])) self.assertEqual("test1", export_status_message["id"]) self.assertEqual("Base Exporter", export_status_message["service"]) self.assertEqual("testhost", export_status_message["host"]) self.assertTrue(export_status_message["instance"]) name, _, kwargs = mock_producer.mock_calls[1] self.assertEqual("publish", name) self.assertEqual("export.status.test.test_user", kwargs["routing_key"]) export_status_message = kwargs["body"] self.assertEqual("completed success", export_status_message["status"]) self.assertTrue(iso8601.parse_date(export_status_message["date_started"])) self.assertTrue(iso8601.parse_date(export_status_message["date_ended"])) self.assertEqual("test1", export_status_message["id"]) self.assertEqual("Base Exporter", export_status_message["service"]) self.assertEqual("testhost", export_status_message["host"]) self.assertTrue(export_status_message["instance"])
def test_export_collection_and_seeds(self, mock_producer_cls, mock_api_client_cls): mock_api_client = MagicMock(spec=ApiClient) mock_api_client_cls.side_effect = [mock_api_client] warcs = [ { "warc_id": "9dc0b9c3a93a49eb8f713330b43f954c", "path": "xtest_1-20151202165907873-00000-306-60892de9dfc6-8001.warc.gz", "sha1": "000ffb3371eadb507d77d181ca3f0c5d3c74a2fc", "bytes": 460518, "date_created": "2016-02-22T14:49:07Z", } ] mock_api_client.warcs.side_effect = [warcs] mock_connection = MagicMock(spec=Connection) mock_exchange = MagicMock(spec=Exchange) mock_exchange.name = "test exchange" mock_producer = MagicMock(spec=Producer) mock_producer_cls.return_value = mock_producer export_message = { "id": "test2", "type": "test_user", "collection": {"id": "005b131f5f854402afa2b08a4b7ba960"}, "format": "csv", "segment_size": None, "path": self.export_path, } exporter = BaseExporter( "http://test", None, None, self.working_path, warc_base_path=self.warc_base_path, host="testhost" ) exporter.mq_config = True exporter._producer_connection = mock_connection exporter.exchange = mock_exchange exporter.routing_key = "export.start.test.test_user" exporter.message = export_message exporter.on_message() mock_api_client_cls.assert_called_once_with("http://test") mock_api_client.warcs.assert_called_once_with( exclude_web=True, collection_id="005b131f5f854402afa2b08a4b7ba960", seed_ids=[], harvest_date_end=None, harvest_date_start=None, ) self.assertFalse(exporter.result.success) name, _, kwargs = mock_producer.mock_calls[1] self.assertEqual("publish", name) self.assertEqual("export.status.test.test_user", kwargs["routing_key"]) export_status_message = kwargs["body"] self.assertEqual("completed failure", export_status_message["status"]) self.assertTrue(iso8601.parse_date(export_status_message["date_started"])) self.assertTrue(iso8601.parse_date(export_status_message["date_ended"])) self.assertEqual("test2", export_status_message["id"]) self.assertTrue(CODE_WARC_MISSING, export_status_message["errors"][0]["code"]) self.assertTrue(CODE_NO_WARCS, export_status_message["errors"][0]["code"])
def test_export_dehydrate(self, mock_producer_cls, mock_api_client_cls): mock_warc_iter_cls = MagicMock() mock_table_cls = MagicMock() mock_table = MagicMock(spec=BaseTable) mock_table_cls.side_effect = [mock_table] mock_table.__iter__ = Mock(return_value=iter([[("key1", "key2"), ("k1v1", "k2v1"), ("k1v2", "k2v2")]])) mock_table.id_field.return_value = "key2" mock_api_client = MagicMock(spec=ApiClient) mock_api_client_cls.side_effect = [mock_api_client] mock_api_client.warcs.side_effect = [self.warcs] mock_connection = MagicMock(spec=Connection) mock_exchange = MagicMock(spec=Exchange) mock_exchange.name = "test exchange" mock_producer = MagicMock(spec=Producer) mock_producer_cls.return_value = mock_producer export_message = { "id": "test1", "type": "test_user", "collection": {"id": "005b131f5f854402afa2b08a4b7ba960"}, "format": "dehydrate", "segment_size": None, "path": self.export_path, } exporter = BaseExporter( "http://test", mock_warc_iter_cls, mock_table_cls, self.working_path, warc_base_path=self.warc_base_path, host="testhost", ) exporter.mq_config = True exporter._producer_connection = mock_connection exporter.exchange = mock_exchange exporter.routing_key = "export.start.test.test_user" exporter.message = export_message exporter.on_message() mock_api_client_cls.assert_called_once_with("http://test") mock_api_client.warcs.assert_called_once_with( exclude_web=True, collection_id="005b131f5f854402afa2b08a4b7ba960", seed_ids=[], harvest_date_end=None, harvest_date_start=None, ) mock_table_cls.assert_called_once_with(self.warc_filepaths, False, None, None, [], None) self.assertTrue(exporter.result.success) txt_filepath = os.path.join(self.export_path, "test1_001.txt") self.assertTrue(os.path.exists(txt_filepath)) with open(txt_filepath, "r") as f: lines = f.readlines() self.assertEqual(2, len(lines)) self.assertEqual("k2v1\n", lines[0]) name, _, kwargs = mock_producer.mock_calls[1] self.assertEqual("publish", name) self.assertEqual("export.status.test.test_user", kwargs["routing_key"]) export_status_message = kwargs["body"] self.assertEqual("completed success", export_status_message["status"]) self.assertEqual("test1", export_status_message["id"])
def test_export_collection(self, mock_producer, mock_api_client_cls): mock_warc_iter_cls = MagicMock() mock_table_cls = MagicMock() mock_table = MagicMock(spec=BaseTable) mock_table_cls.side_effect = [mock_table] mock_table.__iter__ = Mock(return_value=iter([ [("key1", "key2"), ("k1v1", "k2v1"), ("k1v2", "k2v2")], ])) mock_api_client = MagicMock(spec=ApiClient) mock_api_client_cls.side_effect = [mock_api_client] mock_api_client.warcs.side_effect = [self.warcs] mock_connection = MagicMock(spec=Connection) mock_exchange = MagicMock(spec=Exchange) mock_exchange.name = "test exchange" item_date_start = "2007-01-25T12:00:00Z" item_datetime_start = iso8601.parse_date(item_date_start) item_date_end = "2008-02-25T12:00:00Z" item_datetime_end = iso8601.parse_date(item_date_end) harvest_date_start = "2007-03-25T12:00:00Z" harvest_date_end = "2008-04-25T12:00:00Z" export_message = { "id": "test1", "type": "test_user", "collection": { "id": "005b131f5f854402afa2b08a4b7ba960" }, "format": "csv", "segment_size": None, "path": self.export_path, "dedupe": True, "item_date_start": item_date_start, "item_date_end": item_date_end, "harvest_date_start": harvest_date_start, "harvest_date_end": harvest_date_end, } exporter = BaseExporter("http://test", mock_warc_iter_cls, mock_table_cls, self.working_path, warc_base_path=self.warc_base_path, host="testhost") exporter.mq_config = True exporter._producer_connection = mock_connection exporter.exchange = mock_exchange exporter.routing_key = "export.start.test.test_user" exporter.message = export_message exporter.on_message() mock_api_client_cls.assert_called_once_with("http://test") mock_api_client.warcs.assert_called_once_with( collection_id="005b131f5f854402afa2b08a4b7ba960", seed_ids=[], harvest_date_start=harvest_date_start, harvest_date_end=harvest_date_end) mock_table_cls.assert_called_once_with(self.warc_filepaths, True, item_datetime_start, item_datetime_end, [], None) self.assertTrue(exporter.result.success) csv_filepath = os.path.join(self.export_path, "test1_001.csv") self.assertTrue(os.path.exists(csv_filepath)) with open(csv_filepath, "r") as f: lines = f.readlines() self.assertEqual(3, len(lines)) name, _, kwargs = mock_producer.mock_calls[1] self.assertEqual("export.status.test.test_user", kwargs["routing_key"]) export_status_message = kwargs["body"] self.assertEqual("running", export_status_message["status"]) self.assertTrue( iso8601.parse_date(export_status_message["date_started"])) self.assertEqual("test1", export_status_message["id"]) self.assertEqual("Base Exporter", export_status_message["service"]) self.assertEqual("testhost", export_status_message["host"]) self.assertTrue(export_status_message["instance"]) name, _, kwargs = mock_producer.mock_calls[3] self.assertEqual("export.status.test.test_user", kwargs["routing_key"]) export_status_message = kwargs["body"] self.assertEqual("completed success", export_status_message["status"]) self.assertTrue( iso8601.parse_date(export_status_message["date_started"])) self.assertTrue(iso8601.parse_date( export_status_message["date_ended"])) self.assertEqual("test1", export_status_message["id"]) self.assertEqual("Base Exporter", export_status_message["service"]) self.assertEqual("testhost", export_status_message["host"]) self.assertTrue(export_status_message["instance"])
def test_export_collection_and_seeds(self, mock_producer, mock_api_client_cls): mock_api_client = MagicMock(spec=ApiClient) mock_api_client_cls.side_effect = [mock_api_client] warcs = [{ "warc_id": "9dc0b9c3a93a49eb8f713330b43f954c", "path": "xtest_1-20151202165907873-00000-306-60892de9dfc6-8001.warc.gz", "sha1": "000ffb3371eadb507d77d181ca3f0c5d3c74a2fc", "bytes": 460518, "date_created": "2016-02-22T14:49:07Z" }] mock_api_client.warcs.side_effect = [warcs] mock_connection = MagicMock(spec=Connection) mock_exchange = MagicMock(spec=Exchange) mock_exchange.name = "test exchange" export_message = { "id": "test2", "type": "test_user", "collection": { "id": "005b131f5f854402afa2b08a4b7ba960" }, "format": "csv", "segment_size": None, "path": self.export_path } exporter = BaseExporter("http://test", None, None, self.working_path, warc_base_path=self.warc_base_path, host="testhost") exporter.mq_config = True exporter._producer_connection = mock_connection exporter.exchange = mock_exchange exporter.routing_key = "export.start.test.test_user" exporter.message = export_message exporter.on_message() mock_api_client_cls.assert_called_once_with("http://test") mock_api_client.warcs.assert_called_once_with( collection_id="005b131f5f854402afa2b08a4b7ba960", seed_ids=[], harvest_date_end=None, harvest_date_start=None) self.assertFalse(exporter.result.success) name, _, kwargs = mock_producer.mock_calls[3] self.assertEqual("export.status.test.test_user", kwargs["routing_key"]) export_status_message = kwargs["body"] self.assertEqual("completed failure", export_status_message["status"]) self.assertTrue( iso8601.parse_date(export_status_message["date_started"])) self.assertTrue(iso8601.parse_date( export_status_message["date_ended"])) self.assertEqual("test2", export_status_message["id"]) self.assertTrue(CODE_WARC_MISSING, export_status_message["errors"][0]["code"]) self.assertTrue(CODE_NO_WARCS, export_status_message["errors"][0]["code"])
def test_export_seeds(self, mock_api_client_cls): mock_warc_iter_cls = MagicMock() mock_table_cls = MagicMock() mock_table = MagicMock(spec=BaseTable) mock_table_cls.side_effect = [mock_table] mock_table.__iter__ = Mock(return_value=iter([ [("key1", "key2"), ("k1v1", "k2v1"), ("k1v2", "k2v2")], ])) mock_api_client = MagicMock(spec=ApiClient) mock_api_client_cls.side_effect = [mock_api_client] mock_api_client.warcs.side_effect = [self.warcs] export_message = { "id": "test2", "type": "test_user", "seeds": [ { "id": "005b131f5f854402afa2b08a4b7ba960", "uid": "uid1" }, { "id": "105b131f5f854402afa2b08a4b7ba960", "uid": "uid2" }, ], "format": "csv", "segment_size": None, "path": self.export_path, } exporter = BaseExporter("http://test", mock_warc_iter_cls, mock_table_cls, self.working_path, warc_base_path=self.warc_base_path, host="testhost") exporter.routing_key = "export.start.test.test_user" exporter.message = export_message exporter.on_message() mock_api_client_cls.assert_called_once_with("http://test") mock_api_client.warcs.assert_called_once_with( collection_id=None, seed_ids=[ "005b131f5f854402afa2b08a4b7ba960", "105b131f5f854402afa2b08a4b7ba960" ], harvest_date_start=None, harvest_date_end=None) mock_table_cls.assert_called_once_with(self.warc_filepaths, False, None, None, ["uid1", "uid2"], None) self.assertTrue(exporter.result.success) csv_filepath = os.path.join(self.export_path, "test2_001.csv") self.assertTrue(os.path.exists(csv_filepath)) with open(csv_filepath, "r") as f: lines = f.readlines() self.assertEqual(3, len(lines))
def test_export_dehydrate(self, mock_producer, mock_api_client_cls): mock_warc_iter_cls = MagicMock() mock_table_cls = MagicMock() mock_table = MagicMock(spec=BaseTable) mock_table_cls.side_effect = [mock_table] mock_table.__iter__ = Mock(return_value=iter([ [("key1", "key2"), ("k1v1", "k2v1"), ("k1v2", "k2v2")], ])) mock_table.id_field.return_value = "key2" mock_api_client = MagicMock(spec=ApiClient) mock_api_client_cls.side_effect = [mock_api_client] mock_api_client.warcs.side_effect = [self.warcs] mock_connection = MagicMock(spec=Connection) mock_exchange = MagicMock(spec=Exchange) mock_exchange.name = "test exchange" export_message = { "id": "test1", "type": "test_user", "collection": { "id": "005b131f5f854402afa2b08a4b7ba960" }, "format": "dehydrate", "segment_size": None, "path": self.export_path, } exporter = BaseExporter("http://test", mock_warc_iter_cls, mock_table_cls, self.working_path, warc_base_path=self.warc_base_path, host="testhost") exporter.mq_config = True exporter._producer_connection = mock_connection exporter.exchange = mock_exchange exporter.routing_key = "export.start.test.test_user" exporter.message = export_message exporter.on_message() mock_api_client_cls.assert_called_once_with("http://test") mock_api_client.warcs.assert_called_once_with( collection_id="005b131f5f854402afa2b08a4b7ba960", seed_ids=[], harvest_date_end=None, harvest_date_start=None) mock_table_cls.assert_called_once_with(self.warc_filepaths, False, None, None, [], None) self.assertTrue(exporter.result.success) txt_filepath = os.path.join(self.export_path, "test1_001.txt") self.assertTrue(os.path.exists(txt_filepath)) with open(txt_filepath, "r") as f: lines = f.readlines() self.assertEqual(2, len(lines)) self.assertEqual("k2v1\n", lines[0]) name, _, kwargs = mock_producer.mock_calls[3] self.assertEqual("export.status.test.test_user", kwargs["routing_key"]) export_status_message = kwargs["body"] self.assertEqual("completed success", export_status_message["status"]) self.assertEqual("test1", export_status_message["id"])