def test_locator(self): key_1 = {'a': 1, 'b': 2, 'c': 3} key_2 = {'c': 3, 'b': 2, 'a': 1} key_3 = {'c': 1, 'b': 2, 'a': 3} locator_1 = ContentCatalog.get_locator(TYPE_ID, key_1) # eq locator_2 = ContentCatalog.get_locator(TYPE_ID, key_2) # eq locator_3 = ContentCatalog.get_locator(TYPE_ID, key_3) # neq locator_4 = ContentCatalog.get_locator(TYPE_ID[1:], key_1) # neq self.assertTrue(isinstance(locator_1, str)) self.assertTrue(isinstance(locator_2, str)) self.assertEqual(locator_1, locator_2) self.assertNotEqual(locator_1, locator_3) self.assertNotEqual(locator_1, locator_4)
def find(self, type_id, unit_key): """ Find entries in the content catalog using the specified unit type_id and unit_key. The catalog may contain more than one entry matching the locator for a given content source. In this case, only the newest entry for each source is included in the result set. :param type_id: The unit type ID. :type type_id: str :param unit_key: The unit key. :type unit_key: dict :return: A list of matching entries. :rtype: list """ collection = ContentCatalog.get_collection() locator = ContentCatalog.get_locator(type_id, unit_key) query = { 'locator': locator, 'expiration': { '$gte': ContentCatalog.get_expiration(0) } } newest_by_source = {} for entry in collection.find(query, sort=[('_id', ASCENDING)]): newest_by_source[entry['source_id']] = entry return newest_by_source.values()
def test_add(self): units = self.units(0, 10) manager = ContentCatalogManager() for unit_key, url in units: manager.add_entry(SOURCE_ID, EXPIRATION, TYPE_ID, unit_key, url) collection = ContentCatalog.get_collection() self.assertEqual(len(units), collection.find().count()) for unit_key, url in units: locator = ContentCatalog.get_locator(TYPE_ID, unit_key) entry = collection.find_one({'locator': locator}) self.assertEqual(entry['type_id'], TYPE_ID) self.assertEqual(entry['unit_key'], unit_key) self.assertEqual(entry['url'], url)
def delete_entry(self, source_id, type_id, unit_key): """ Delete an entry from the content catalog. :param source_id: A content source ID. :type source_id: str :param type_id: The unit type ID. :type type_id: str :param unit_key: The unit key. :type unit_key: dict """ collection = ContentCatalog.get_collection() locator = ContentCatalog.get_locator(type_id, unit_key) query = {'source_id': source_id, 'locator': locator} collection.remove(query, safe=True)
def test_add(self): units = self.units(0, 10) conduit = CatalogerConduit(SOURCE_ID, EXPIRES) for unit_key, url in units: conduit.add_entry(TYPE_ID, unit_key, url) collection = ContentCatalog.get_collection() self.assertEqual(conduit.source_id, SOURCE_ID) self.assertEqual(conduit.expires, EXPIRES) self.assertEqual(len(units), collection.find().count()) self.assertEqual(conduit.added_count, len(units)) self.assertEqual(conduit.deleted_count, 0) for unit_key, url in units: locator = ContentCatalog.get_locator(TYPE_ID, unit_key) entry = collection.find_one({"locator": locator}) self.assertEqual(entry["type_id"], TYPE_ID) self.assertEqual(entry["unit_key"], unit_key) self.assertEqual(entry["url"], url)
def test_add(self): units = self.units(0, 10) conduit = CatalogerConduit(SOURCE_ID, EXPIRES) for unit_key, url in units: conduit.add_entry(TYPE_ID, unit_key, url) collection = ContentCatalog.get_collection() self.assertEqual(conduit.source_id, SOURCE_ID) self.assertEqual(conduit.expires, EXPIRES) self.assertEqual(len(units), collection.find().count()) self.assertEqual(conduit.added_count, len(units)) self.assertEqual(conduit.deleted_count, 0) for unit_key, url in units: locator = ContentCatalog.get_locator(TYPE_ID, unit_key) entry = collection.find_one({'locator': locator}) self.assertEqual(entry['type_id'], TYPE_ID) self.assertEqual(entry['unit_key'], unit_key) self.assertEqual(entry['url'], url)
def test_delete(self): units = self.units(0, 10) conduit = CatalogerConduit(SOURCE_ID, EXPIRES) for unit_key, url in units: conduit.add_entry(TYPE_ID, unit_key, url) collection = ContentCatalog.get_collection() self.assertEqual(len(units), collection.find().count()) unit_key, url = units[5] locator = ContentCatalog.get_locator(TYPE_ID, unit_key) entry = collection.find_one({"locator": locator}) self.assertEqual(entry["type_id"], TYPE_ID) self.assertEqual(entry["unit_key"], unit_key) self.assertEqual(entry["url"], url) conduit.delete_entry(TYPE_ID, unit_key) self.assertEqual(len(units) - 1, collection.find().count()) self.assertEqual(conduit.added_count, len(units)) self.assertEqual(conduit.deleted_count, 1) entry = collection.find_one({"locator": locator}) self.assertTrue(entry is None)
def test_delete(self): units = self.units(0, 10) conduit = CatalogerConduit(SOURCE_ID, EXPIRES) for unit_key, url in units: conduit.add_entry(TYPE_ID, unit_key, url) collection = ContentCatalog.get_collection() self.assertEqual(len(units), collection.find().count()) unit_key, url = units[5] locator = ContentCatalog.get_locator(TYPE_ID, unit_key) entry = collection.find_one({'locator': locator}) self.assertEqual(entry['type_id'], TYPE_ID) self.assertEqual(entry['unit_key'], unit_key) self.assertEqual(entry['url'], url) conduit.delete_entry(TYPE_ID, unit_key) self.assertEqual(len(units) - 1, collection.find().count()) self.assertEqual(conduit.added_count, len(units)) self.assertEqual(conduit.deleted_count, 1) entry = collection.find_one({'locator': locator}) self.assertTrue(entry is None)
def find(self, type_id, unit_key): """ Find entries in the content catalog using the specified unit type_id and unit_key. The catalog may contain more than one entry matching the locator for a given content source. In this case, only the newest entry for each source is included in the result set. :param type_id: The unit type ID. :type type_id: str :param unit_key: The unit key. :type unit_key: dict :return: A list of matching entries. :rtype: list """ collection = ContentCatalog.get_collection() locator = ContentCatalog.get_locator(type_id, unit_key) query = { 'locator': locator, 'expiration': {'$gte': ContentCatalog.get_expiration(0)} } newest_by_source = {} for entry in collection.find(query, sort=[('_id', ASCENDING)]): newest_by_source[entry['source_id']] = entry return newest_by_source.values()