def test_remove_child_from_extended_path_index(self): rid = self.get_rid(self.child_folder) index = self.catalog.indexes['path'] # _index self.assertIn(rid, index._index['plone'][0]) # plone site at level 0 self.assertIn(rid, index._index['foo'][1]) # parent id at level 1 self.assertIn(rid, index._index['child'][2]) # object id at level 2 self.assertIn(rid, index._index[None][2]) # terminator at level 2 self.assertEqual(5, len(index._index)) # 3 objects, plone, terminator # _index_items items_pointing_to_rid = find_keys_pointing_to_rid( index._index_items, rid) self.assertEqual(1, len(items_pointing_to_rid)) self.assertEqual(3, len(index._index_items)) # _index_parents parents_pointing_to_rid = find_keys_pointing_to_rid( index._index_parents, rid) self.assertEqual(1, len(parents_pointing_to_rid)) self.assertEqual(2, len(index._index_parents)) # _unindex self.assertIn(rid, index._unindex) self.assertEqual(3, len(index._unindex)) # index stats self.assertEqual(3, len(index)) surgery = RemoveFromExtendedPathIndex(index, rid) surgery.perform() # _index self.assertNotIn(rid, index._index['plone'][0]) self.assertNotIn(rid, index._index['foo'][1]) # child is dropped as it is without children self.assertNotIn('child', index._index) # 2nd level terminator is dropped as there are no more entries self.assertNotIn(2, index._index[None]) self.assertEqual(4, len(index._index)) # _index_items items_pointing_to_rid = find_keys_pointing_to_rid( index._index_items, rid) self.assertEqual(0, len(items_pointing_to_rid)) self.assertEqual(2, len(index._index_items)) # _index_parents parents_pointing_to_rid = find_keys_pointing_to_rid( index._index_parents, rid) self.assertEqual(0, len(parents_pointing_to_rid)) # '/plone' is the only remaining parent self.assertEqual(1, len(index._index_parents)) # _unindex self.assertNotIn(rid, index._unindex) self.assertEqual(2, len(index._unindex)) # index stats self.assertEqual(2, len(index))
def test_remove_parent_from_extended_path_index(self): rid = self.get_rid(self.folder) index = self.catalog.indexes['path'] # _index self.assertIn(rid, index._index['plone'][0]) # plone site at level 0 self.assertIn(rid, index._index['foo'][1]) # object id at level 1 self.assertIn(rid, index._index[None][1]) # terminator at level 1 self.assertEqual(5, len(index._index)) # 3 objects, plone, terminator # _index_items items_pointing_to_rid = find_keys_pointing_to_rid( index._index_items, rid) self.assertEqual(1, len(items_pointing_to_rid)) self.assertEqual(3, len(index._index_items)) # _index_parents parents_pointing_to_rid = find_keys_pointing_to_rid( index._index_parents, rid) self.assertEqual(1, len(parents_pointing_to_rid)) self.assertEqual(2, len(index._index_parents)) # _unindex self.assertIn(rid, index._unindex) self.assertEqual(3, len(index._unindex)) # index stats self.assertEqual(3, len(index)) surgery = RemoveFromExtendedPathIndex(index, rid) surgery.perform() # _index self.assertNotIn(rid, index._index['plone'][0]) # foo itself remains as it has children self.assertNotIn(rid, index._index['foo'][1]) self.assertNotIn(rid, index._index[None][1]) # length remains as we removed an object with children self.assertEqual(5, len(index._index)) # _index_items items_pointing_to_rid = find_keys_pointing_to_rid( index._index_items, rid) self.assertEqual(0, len(items_pointing_to_rid)) self.assertEqual(2, len(index._index_items)) # _index_parents parents_pointing_to_rid = find_keys_pointing_to_rid( index._index_parents, rid) self.assertEqual(0, len(parents_pointing_to_rid)) self.assertEqual(2, len(index._index_parents)) # _unindex self.assertNotIn(rid, index._unindex) self.assertEqual(2, len(index._unindex)) # index stats self.assertEqual(2, len(index))
def test_unindex_find_keys_pointing_to_rid(self): mock = Mock() mock.foo = 'a key' index = UnIndex('foo') index.index_object(-12, mock) self.assertItemsEqual(['a key'], find_keys_pointing_to_rid(index, -12))
def test_remove_healthy_object(self): rid = self.get_rid(self.folder) index = self.catalog.indexes['UID'] entries_pointing_to_rid = find_keys_pointing_to_rid(index._index, rid) self.assertEqual(1, len(entries_pointing_to_rid)) self.assertIn(rid, index._unindex) self.assertEqual(1, len(index)) surgery = RemoveFromUUIDIndex(index, rid) surgery.perform() entries_pointing_to_rid = find_keys_pointing_to_rid(index._index, rid) self.assertEqual(0, len(entries_pointing_to_rid)) self.assertNotIn(rid, index._unindex) self.assertEqual(0, len(index))
def perform(self): # _index components_with_rid = [] for component, level_to_rid in self.index._index.items(): for level, rids in level_to_rid.items(): if self.rid in rids: components_with_rid.append(( component, level, )) for component, level in components_with_rid: self.index._index[component][level].remove(self.rid) if not self.index._index[component][level]: del self.index._index[component][level] if not self.index._index[component]: del self.index._index[component] # _index_items for key in find_keys_pointing_to_rid(self.index._index_items, self.rid): del self.index._index_items[key] # _index_parents self._remove_keys_pointing_to_rid(self.index._index_parents) # _unindex if self.rid in self.index._unindex: del self.index._unindex[self.rid] self.index._length.change(-1)
def test_remove_from_forward_index_only(self): rid = self.get_rid(self.folder) index = self.catalog.indexes['UID'] del index._unindex[rid] entries_pointing_to_rid = find_keys_pointing_to_rid(index._index, rid) self.assertEqual(1, len(entries_pointing_to_rid)) self.assertEqual(1, len(index._index)) surgery = RemoveFromUUIDIndex(index, rid) surgery.perform() entries_pointing_to_rid = find_keys_pointing_to_rid(index._index, rid) self.assertEqual(0, len(entries_pointing_to_rid)) self.assertNotIn(rid, index._unindex) self.assertEqual(0, len(index))
def test_remove_healthy_object_from_dateindex(self): rid = self.get_rid(self.folder) index = self.catalog.indexes['modified'] self.assertIs(DateIndex, type(index)) entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) self.assertEqual(1, len(entries_pointing_to_rid)) self.assertIn(rid, index._unindex) self.assertEqual(1, len(index)) surgery = RemoveFromUnIndex(index, rid) surgery.perform() entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) self.assertEqual(0, len(entries_pointing_to_rid)) self.assertNotIn(rid, index._unindex) self.assertEqual(0, len(index))
def test_remove_healthy_object_from_keywordindex(self): rid = self.get_rid(self.folder) index = self.catalog.indexes['object_provides'] self.assertIs(KeywordIndex, type(index)) entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) self.assertGreater(len(entries_pointing_to_rid), 0) self.assertIn(rid, index._unindex) self.assertGreater(len(index), 0) surgery = RemoveFromUnIndex(index, rid) surgery.perform() entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) self.assertEqual(0, len(entries_pointing_to_rid)) self.assertNotIn(rid, index._unindex) self.assertEqual(0, len(index))
def test_remove_from_reverse_index_only(self): rid = self.get_rid(self.folder) index = self.catalog.indexes['UID'] entries_pointing_to_rid = find_keys_pointing_to_rid(index._index, rid) del index._index[entries_pointing_to_rid[0]] index._length.change(-1) self.assertIn(rid, index._unindex) self.assertEqual(1, len(index._unindex)) surgery = RemoveFromUUIDIndex(index, rid) surgery.perform() entries_pointing_to_rid = find_keys_pointing_to_rid(index._index, rid) self.assertEqual(0, len(entries_pointing_to_rid)) self.assertNotIn(rid, index._unindex) self.assertEqual(0, len(index))
def test_btrees_find_keys_in_pointing_to_rid(self): dictish = OOBTree({ 'foo': IITreeSet((5, -17, 43)), 'bar': IITreeSet(), 'somekey': IITreeSet((-17, 1)) }) self.assertItemsEqual(['foo', 'somekey'], find_keys_pointing_to_rid(dictish, -17))
def test_remove_from_daterange_index_always(self): self.set_effective_range(None, None) rid = self.get_rid(self.folder) index = self.catalog.indexes['effectiveRange'] self.assertIn(rid, index._always) self.assertEqual( 0, len(find_keys_pointing_to_rid(index._since_only, rid))) self.assertEqual( 0, len(find_keys_pointing_to_rid(index._until_only, rid))) self.assertEqual(0, len(find_keys_pointing_to_rid(index._since, rid))) self.assertEqual(0, len(find_keys_pointing_to_rid(index._until, rid))) self.assertIn(rid, index._unindex) surgery = RemoveFromDateRangeIndex(index, rid) surgery.perform() self.assertNotIn(rid, index._always) self.assertNotIn(rid, index._unindex)
def test_remove_healthy_object_from_daterecurringindex(self): # pretend we are something that supports recurring dates self.folder.start = date(2010, 1, 1) self.folder.recurrence = 'FREQ=DAILY;INTERVAL=1;COUNT=5' self.reindex_object(self.folder) rid = self.get_rid(self.folder) index = self.catalog.indexes['start'] self.assertIs(DateRecurringIndex, type(index)) entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) self.assertEqual(5, len(entries_pointing_to_rid)) self.assertIn(rid, index._unindex) self.assertEqual(5, len(index)) surgery = RemoveFromUnIndex(index, rid) surgery.perform() entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) self.assertEqual(0, len(entries_pointing_to_rid)) self.assertNotIn(rid, index._unindex) self.assertEqual(0, len(index))
def _remove_keys_pointing_to_rid(self, index, linked_length=None): """Remove all entries pointing to rid from a forward index. Rows in indices are expected to be a set, e.g. a `TreeSet`. Once the set is emtpy it should also be removed from the index. If `linked_length` is provided it is decreased when a row is removed. """ for key in find_keys_pointing_to_rid(index, self.rid): row = index[key] row.remove(self.rid) if not row: del index[key] if linked_length: linked_length.change(-1)
def test_find_keys_pointing_to_rid_emtpy_result(self): dictish = {'foo': [1, 77], 'qux': []} self.assertItemsEqual([], find_keys_pointing_to_rid(dictish, 1337))
def test_find_keys_pointing_to_rid_single_value(self): dictish = {'foo': 1, 'bar': -23} self.assertItemsEqual(['bar'], find_keys_pointing_to_rid(dictish, -23))
def test_make_unhealthy_extra_rid_after_move(self): """Selftest that broken rids are created correctly. Apparently the problem surfaces only with plone < 5. Document in what way the catalog is broken when an extra rid is created. This has been verified against productive deployments where this issue is present. """ self.make_unhealthy_extra_rid_after_move(self.child) result = self.run_healthcheck() self.assertFalse(result.is_healthy()) self.assertEqual(1, len(result.unhealthy_rids)) extra_rid = result.get_unhealthy_rids()[0].rid self.assertTrue(extra_rid in self.catalog.paths) self.assertFalse(extra_rid in self.catalog.uids.values()) uuid_index = self.catalog.indexes['UID'] self.assertTrue(extra_rid in uuid_index._unindex) self.assertFalse(extra_rid in uuid_index._index.values()) for name, index in self.catalog.indexes.items(): # Purposefully don't use isinstance to avoid being bitten by # subclasses that change how the index behaves or add additional # internal data structures which we would not cover here, e.g.: # `ExtendedPathIndex` v.s. `PathIndex`. if index.__class__ in (FieldIndex, DateIndex, DateRecurringIndex, KeywordIndex): # These indices seem to consistently contain the extra rid, i.e. # it is left behind in the forward index and also in the # backward indices. rows_with_rid = find_keys_pointing_to_rid( index._index, extra_rid) if rows_with_rid: self.assertIn(extra_rid, index._unindex) if extra_rid in index._unindex: self.assertGreaterEqual(len(rows_with_rid), 1) elif index.__class__ == ZCTextIndex: # Our broken object test provides values for all ZCTextIndex # indices. All ZCTextIndex indices that contain the extra rid # seem to contain it consistently. self.assertTrue(index.index.has_doc(extra_rid)) elif index.__class__ == UUIDIndex: # We expect only one UUIDIndex and we have already handled it # explicitly above if name != 'UID': self.fail('Unexpected uuid index: {}'.format(index)) elif index.__class__ == DateRangeIndex: # The index seems to be consistent, forward and backward # indices contain the extra rid. self.assertTrue(extra_rid in index._unindex) self.assertTrue( any(( # _always: [rid] extra_rid in index._always, # all other extra indices provide: {date: [rid]} find_keys_pointing_to_rid(index._since_only, extra_rid), find_keys_pointing_to_rid(index._until_only, extra_rid), find_keys_pointing_to_rid(index._since, extra_rid), find_keys_pointing_to_rid(index._until, extra_rid), ))) elif index.__class__ == BooleanIndex: # The index seems to be consistent, forward and backward # indices contain the extra rid. self.assertIn(extra_rid, index._unindex) if index._unindex[extra_rid] == index._index_value: self.assertIn(extra_rid, index._index) elif index.__class__ == ExtendedPathIndex: # The index seems to be consistent, forward and backward # indices contain the extra rid. # _unindex: {rid: path} self.assertIn(extra_rid, index._unindex) # _index_items: {path: rid} self.assertIn(extra_rid, index._index_items.values()) # _index_parents: {path: [rid]} (path to rid of children) paths_with_rid_as_child = find_keys_pointing_to_rid( index._index_parents, extra_rid) self.assertEqual(1, len(paths_with_rid_as_child)) # _index: {component: {level: [rid]}} (component to level to rid) components_with_rid = [ component for component, level_to_rid in index._index.items() if any(extra_rid in rids for level, rids in level_to_rid.items()) ] self.assertGreaterEqual(len(components_with_rid), 1) elif index.__class__ == GopipIndex: # This isn't a real index. pass else: self.fail('Unhandled index type: {}'.format(index)) self.assertEqual(( 'in_metadata_keys_not_in_uids_values', 'in_paths_keys_not_in_uids_values', 'in_uuid_unindex_not_in_catalog', 'in_uuid_unindex_not_in_uuid_index', 'uids_tuple_mismatches_paths_tuple', ), result.get_symptoms(extra_rid))
def get_extended_index_data(index, rid): """Return all data stored in an index for rid. This usually includes backward and forward indexes and also helper indexes if available. """ if isinstance(index, GopipIndex): return '<UNSUPPORTED>' index_data = {} if isinstance(index, PathIndex): index_data['unindex'] = {} unindex_value = index._unindex.get(rid, _marker) if unindex_value is not _marker: index_data['unindex'][rid] = unindex_value index_data['index'] = {} for component, level_to_rid in index._index.items(): for level, rids in level_to_rid.items(): if rid in rids: index_data['index'][( component, level, )] = rid if isinstance(index, ExtendedPathIndex): index_data['index_items'] = {} index_values = find_keys_pointing_to_rid(index._index_items, rid) for index_value in index_values: index_data['index_items'][index_value] = rid index_data['index_parents'] = {} index_values = find_keys_pointing_to_rid(index._index_parents, rid) for index_value in index_values: index_data['index_parents'][index_value] = rid elif isinstance(index, ZCTextIndex): # just show what word ids are available for the rid to indicate it # is present in the index. not bothering to look up the acual # string represented by the term. may be omitted if not useful. index_data['docwords'] = {} if index.index.has_doc(rid): index_data['docwords'][rid] = index.index.get_words(rid) elif isinstance(index, DateRangeIndex): index_data['always'] = [] if rid in index._always: index_data['always'] = [rid] index_data['since_only'] = {} index_values = find_keys_pointing_to_rid(index._since_only, rid) for index_value in index_values: index_data['since_only'][index_value] = rid index_data['until_only'] = {} index_values = find_keys_pointing_to_rid(index._until_only, rid) for index_value in index_values: index_data['until_only'][index_value] = rid index_data['since'] = {} index_values = find_keys_pointing_to_rid(index._since, rid) for index_value in index_values: index_data['since'][index_value] = rid index_data['until'] = {} index_values = find_keys_pointing_to_rid(index._until, rid) for index_value in index_values: index_data['until'][index_value] = rid index_data['unindex'] = {} unindex_value = index._unindex.get(rid, _marker) if unindex_value is not _marker: index_data['unindex'][rid] = unindex_value elif isinstance(index, BooleanIndex): # _index is special an only contains either `True` or `False` # values, we are just interested in _unindex index_data['unindex'] = {} unindex_value = index._unindex.get(rid, _marker) if unindex_value is not _marker: index_data['unindex'][rid] = unindex_value elif isinstance(index, UnIndex): index_data['unindex'] = {} unindex_value = index._unindex.get(rid, _marker) if unindex_value is not _marker: index_data['unindex'][rid] = unindex_value index_data['index'] = {} index_values = find_keys_pointing_to_rid(index._index, rid) for index_value in index_values: index_data['index'][index_value] = rid return index_data
def _remove_keys_pointing_to_rid(self, index, linked_length=None): for key in find_keys_pointing_to_rid(index, self.rid): del index[key] self.index._length.change(-1)
def test_find_keys_pointing_to_rid(self): dictish = {'foo': [1, 77, 678], 'bar': [3, 77], 'qux': []} self.assertItemsEqual(['foo', 'bar'], find_keys_pointing_to_rid(dictish, 77))