def reindex_catalog(zport, permissionsOnly=False, printProgress=True, commit=True): globalCat = zport.global_catalog with catalog_caching(): msg = 'objects' if permissionsOnly: msg = 'permissions' log.info('reindexing %s in catalog' % msg) i = 0 catObj = globalCat.catalog_object for brain in globalCat(): log.debug('indexing %s' % brain.getPath()) try: obj = brain.getObject() except KeyError: log.debug("Could not load object. Deleting: %s", brain.getPath()) globalCat.uncatalog_object(brain.getPath()) continue except Exception: log.warn("Unexpected exception loading object %s. Skipping!", brain.getPath(), exc_info=True) continue if obj is not None: #None defaults to all inedexs kwargs = {} if permissionsOnly: kwargs = { 'update_metadata': False, 'idxs': ("allowedRolesAndUsers", ) } elif hasattr(obj, 'index_object'): obj.index_object() catObj(obj, **kwargs) log.debug('Catalogued object %s' % obj.absolute_url_path()) else: log.debug('%s does not exists' % brain.getPath()) globalCat.uncatalog_object(brain.getPath()) i += 1 if not i % PROGRESS_CHUNK_SIZE: if printProgress: sys.stdout.write(".") sys.stdout.flush() else: log.info('Catalogued %s objects' % i) if commit: transaction.commit() if printProgress: sys.stdout.write('\n') sys.stdout.flush() if commit: transaction.commit()
def reindex_catalog(zport, permissionsOnly=False, printProgress=True, commit=True): globalCat = zport.global_catalog with catalog_caching(): msg = 'objects' if permissionsOnly: msg = 'permissions' log.info('reindexing %s in catalog' % msg) i = 0 catObj = globalCat.catalog_object for brain in globalCat(): log.debug('indexing %s' % brain.getPath()) try: obj = brain.getObject() except KeyError: log.debug("Could not load object. Deleting: %s", brain.getPath()) globalCat.uncatalog_object(brain.getPath()) continue except Exception: log.warn("Unexpected exception loading object %s. Skipping!", brain.getPath(), exc_info=True) continue if obj is not None: #None defaults to all inedexs kwargs = {} if permissionsOnly: kwargs = {'update_metadata': False, 'idxs': ("allowedRolesAndUsers",)} elif hasattr(obj, 'index_object'): obj.index_object() catObj(obj, **kwargs) log.debug('Catalogued object %s' % obj.absolute_url_path()) else: log.debug('%s does not exists' % brain.getPath()) globalCat.uncatalog_object(brain.getPath()) i += 1 if not i % PROGRESS_CHUNK_SIZE: if printProgress: sys.stdout.write(".") sys.stdout.flush() else: log.info('Catalogued %s objects' % i) if commit: transaction.commit() if printProgress: sys.stdout.write('\n') sys.stdout.flush() if commit: transaction.commit()
def run(self): def stop(ignored): reactor.stop() def main(): zport = self.dmd.getPhysicalRoot().zport if self.options.createcatalog: d = self._createCatalog(zport) elif self.options.reindex: d = self._reindex(zport) d.addBoth(stop) if not self.options.createcatalog and not self.options.reindex: self.parser.error("Must use one of --createcatalog, --reindex") reactor.callWhenRunning(main) with catalog_caching(): reactor.run()
def run(self): def stop(ignored): reactor.stop() def main(): zport = self.dmd.getPhysicalRoot().zport if self.options.createcatalog: d = self._createCatalog(zport) elif self.options.reindex: d = self._reindex(zport) d.addBoth(stop) if not self.options.createcatalog and not self.options.reindex: self.parser.error("Must use one of --createcatalog, --reindex") reactor.callWhenRunning(main) with catalog_caching(): reactor.run()
def convert_into_document(worker_id, inbox, outbox, buffer_size, permissions_only, print_progress=None): ignore_interruptions() drop_all_arguments() zc = ZenCatalog() dmd = zc.dmd zport = dmd.getPhysicalRoot().zport catalog = dmd.global_catalog vals = [] documentIds = [] uids = [] counter = ProgressCounter(print_progress=print_progress) # Apply monkey patches ... def index_object(self, documentId, obj, threshold=None): val = self._evaluate(obj) cuv = self._val2UnindexVal if val is not None and cuv is not None: unindexVal = cuv(val) else: unindexVal = val documentIds.append(documentId) if val is not None: vals.append((self.id, val, unindexVal)) return 1 return 0 ManagableIndex.index_object = index_object def mpi_index_object(self, docId, obj, threshold=None): f = getattr(obj, self.id, None) if f is not None: if safe_callable(f): try: paths = f() except AttributeError: return 0 else: paths = f else: try: paths = obj.getPhysicalPath() except AttributeError: return 0 if paths: paths = _recursivePathSplit(paths) if not _isSequenceOfSequences(paths): paths = [paths] vals.append((self.id, paths, None)) return 1 return 0 MultiPathIndex.index_object = mpi_index_object orig_catalogObject = Catalog.catalogObject def catalogObject(self, object, uid, threshold=None, idxs=None, update_metadata=1): uids.append(uid) return orig_catalogObject(self, object, uid, threshold, idxs, update_metadata) Catalog.catalogObject = catalogObject def convertToDocument(primary_path): while True: try: try: obj = dmd.unrestrictedTraverse(primary_path) except (AttributeError, ClientDisconnected, DisconnectedError): raise except KeyError: log.debug("Could not load object: %s", primary_path) put_or_die(outbox, (None, primary_path, None, None)) # uncatalog counter.increment() continue except Exception: log.warn( "Unexpected exception loading object %s. Skipping!", primary_path, exc_info=True) counter.increment() continue if obj is None: log.debug("%s does not exist", primary_path) put_or_die(outbox, (None, primary_path, None, None)) # uncatalog counter.increment() continue if permissions_only: catalog.catalog_object(obj, update_metadata=False, idxs=("allowedRolesAndUsers", )) else: # We intentionally don't do legacy indexing: # if hasattr(obj, 'index_object'): obj.index_object() catalog.catalog_object(obj) if documentIds: uid = uids.pop() documentId = documentIds[0] metadata = catalog._catalog.data.get(documentId, {}) put_or_die(outbox, (documentId, uid[:], vals[:], metadata)) counter.increment() except (AttributeError, ClientDisconnected, DisconnectedError): reconnect(zc) continue finally: # clear lists vals[:] = [] uids[:] = [] documentIds[:] = [] # Invalidation allows object to be garbage collected inv = getattr(obj, '_p_invalidate', None) if inv is not None: inv() break # Process my inbox ... with catalog_caching(): tick = time.time() while True: try: if (counter.count % 100 == 0) and (time.time() - tick > 5.0): transaction.abort() # Allow garbage collection tick = time.time() primary_path = inbox.get_nowait() except Empty: check_for_dead_parent() time.sleep(0.1) continue if primary_path is None: break # End of inbox. We're done here. try: convertToDocument(primary_path) except Exception: log.info("Error indexing object %s. Skipping.", primary_path, exc_info=log.isEnabledFor(logging.DEBUG))
def source_from_zport(control, outbox, resume, print_progress=None): ignore_interruptions() drop_all_arguments() zc = ZenCatalog() zport = zc.dmd.getPhysicalRoot().zport counter = ProgressCounter(print_progress=print_progress) def find_kids(obj, call_tree): nones_counted = 0 for kid in recurse(obj, call_tree): if kid is None: nones_counted += 1 else: yield kid if nones_counted: try: description = obj.getPrimaryPath() except Exception: description = repr(obj) log.error("Object %s has a None child!" % description) def recurse(obj, call_tree=None): if call_tree is None: call_tree = RecursiveDefaultDict() while True: check_for_dead_parent() check_for_interrupt(control) try: obj_id = obj.id tree = call_tree[obj_id] if tree is not False: if not isinstance(obj, GlobalCatalog): if isinstance(obj, ObjectManager): for ob in obj.objectValues(): for kid in find_kids(ob, tree): yield kid if isinstance(obj, ZenModelRM): for rel in obj.getRelationships(): if isinstance(rel, ToManyContRelationship): for kid in find_kids(rel, tree): yield kid yield obj elif isinstance(obj, ToManyContRelationship): for ob in obj.objectValuesGen(): for kid in find_kids(ob, tree): yield kid # invalidation allows object to be garbage collected inv = getattr(obj, '_p_invalidate', None) if inv is not None: inv() call_tree[obj_id] = False except (AttributeError, ClientDisconnected, DisconnectedError): _reconnect(zc) continue break try: if resume: with open(CALL_TREE_DUMP_FILE, 'rb') as f: call_tree = pickle.load(f) else: call_tree = RecursiveDefaultDict() except (pickle.PickleError, IOError): call_tree = RecursiveDefaultDict() try: with catalog_caching(): tick = time.time() for obj in recurse(zport, call_tree): put_or_die(outbox, obj.getPrimaryPath()) if (counter.increment() % 100 == 0) and (time.time() - tick) > 5.0: transaction.abort() # allow garbage collection tick = time.time() except KeyboardInterrupt: if call_tree: with open(CALL_TREE_DUMP_FILE, 'wb') as f: pickle.dump(call_tree, f) else: quietly_remove(CALL_TREE_DUMP_FILE) finally: transaction.abort()
def convert_into_document(worker_id, inbox, outbox, buffer_size, permissions_only, print_progress=None): ignore_interruptions() drop_all_arguments() zc = ZenCatalog() dmd = zc.dmd zport = dmd.getPhysicalRoot().zport catalog = dmd.global_catalog vals = [] documentIds = [] uids = [] counter = ProgressCounter(print_progress=print_progress) # Apply monkey patches ... def index_object(self, documentId, obj, threshold=None): val= self._evaluate(obj) cuv = self._val2UnindexVal if val is not None and cuv is not None: unindexVal = cuv(val) else: unindexVal = val documentIds.append(documentId) if val is not None: vals.append((self.id, val, unindexVal)) return 1 return 0 ManagableIndex.index_object = index_object def mpi_index_object(self, docId, obj, threshold=None): f = getattr(obj, self.id, None) if f is not None: if safe_callable(f): try: paths = f() except AttributeError: return 0 else: paths = f else: try: paths = obj.getPhysicalPath() except AttributeError: return 0 if paths: paths = _recursivePathSplit(paths) if not _isSequenceOfSequences(paths): paths = [paths] vals.append((self.id, paths, None)) return 1 return 0 MultiPathIndex.index_object = mpi_index_object orig_catalogObject = Catalog.catalogObject def catalogObject(self, object, uid, threshold=None, idxs=None, update_metadata=1): uids.append(uid) return orig_catalogObject(self, object, uid, threshold, idxs, update_metadata) Catalog.catalogObject = catalogObject def convertToDocument(primary_path): while True: try: try: obj = dmd.unrestrictedTraverse(primary_path) except (AttributeError, ClientDisconnected, DisconnectedError): raise except KeyError: log.debug("Could not load object: %s", primary_path) put_or_die(outbox, (None, primary_path, None, None)) # uncatalog counter.increment() continue except Exception: log.warn("Unexpected exception loading object %s. Skipping!", primary_path, exc_info=True) counter.increment() continue if obj is None: log.debug("%s does not exist", primary_path) put_or_die(outbox, (None, primary_path, None, None)) # uncatalog counter.increment() continue if permissions_only: catalog.catalog_object(obj, update_metadata=False, idxs=("allowedRolesAndUsers",)) else: # We intentionally don't do legacy indexing: # if hasattr(obj, 'index_object'): obj.index_object() catalog.catalog_object(obj) if documentIds: uid = uids.pop() documentId = documentIds[0] metadata = catalog._catalog.data.get(documentId, {}) put_or_die(outbox, (documentId, uid[:], vals[:], metadata)) counter.increment() except (AttributeError, ClientDisconnected, DisconnectedError): reconnect(zc) continue finally: # clear lists vals[:] = [] uids[:] = [] documentIds[:] = [] # Invalidation allows object to be garbage collected inv = getattr(obj, '_p_invalidate', None) if inv is not None: inv() break # Process my inbox ... with catalog_caching(): tick = time.time() while True: try: if (counter.count % 100 == 0) and (time.time() - tick > 5.0): transaction.abort() # Allow garbage collection tick = time.time() primary_path = inbox.get_nowait() except Empty: check_for_dead_parent() time.sleep(0.1) continue if primary_path is None: break # End of inbox. We're done here. try: convertToDocument(primary_path) except Exception: log.info("Error indexing object %s. Skipping.", primary_path, exc_info = log.isEnabledFor(logging.DEBUG))
def source_from_zport(control, outbox, resume, print_progress=None): ignore_interruptions() drop_all_arguments() zc = ZenCatalog() zport = zc.dmd.getPhysicalRoot().zport counter = ProgressCounter(print_progress=print_progress) def find_kids(obj, call_tree): nones_counted = 0 for kid in recurse(obj, call_tree): if kid is None: nones_counted += 1 else: yield kid if nones_counted: try: description = obj.getPrimaryPath() except Exception: description = repr(obj) log.error("Object %s has a None child!" % description) def recurse(obj, call_tree=None): if call_tree is None: call_tree = RecursiveDefaultDict() while True: check_for_dead_parent() check_for_interrupt(control) try: obj_id = obj.id tree = call_tree[obj_id] if tree is not False: if not isinstance(obj, GlobalCatalog): if isinstance(obj, ObjectManager): for ob in obj.objectValues(): for kid in find_kids(ob, tree): yield kid if isinstance(obj, ZenModelRM): for rel in obj.getRelationships(): if isinstance(rel, ToManyContRelationship): for kid in find_kids(rel, tree): yield kid yield obj elif isinstance(obj, ToManyContRelationship): for ob in obj.objectValuesGen(): for kid in find_kids(ob, tree): yield kid # invalidation allows object to be garbage collected inv = getattr(obj, '_p_invalidate', None) if inv is not None: inv() call_tree[obj_id] = False except (AttributeError, ClientDisconnected, DisconnectedError): _reconnect(zc) continue break try: if resume: with open(CALL_TREE_DUMP_FILE, 'rb') as f: call_tree = pickle.load(f) else: call_tree = RecursiveDefaultDict() except (pickle.PickleError, IOError): call_tree = RecursiveDefaultDict() try: with catalog_caching(): tick = time.time() for obj in recurse(zport, call_tree): put_or_die(outbox, obj.getPrimaryPath()) if (counter.increment() % 100 == 0) and (time.time() - tick) > 5.0: transaction.abort() # allow garbage collection tick = time.time() except KeyboardInterrupt: if call_tree: with open(CALL_TREE_DUMP_FILE, 'wb') as f: pickle.dump(call_tree, f) else: quietly_remove(CALL_TREE_DUMP_FILE) finally: transaction.abort()