예제 #1
0
def reindex_catalog(zport,
                    permissionsOnly=False,
                    printProgress=True,
                    commit=True):
    globalCat = zport.global_catalog
    with catalog_caching():
        msg = 'objects'
        if permissionsOnly:
            msg = 'permissions'
        log.info('reindexing %s in catalog' % msg)
        i = 0
        catObj = globalCat.catalog_object
        for brain in globalCat():
            log.debug('indexing %s' % brain.getPath())
            try:
                obj = brain.getObject()
            except KeyError:
                log.debug("Could not load object. Deleting: %s",
                          brain.getPath())
                globalCat.uncatalog_object(brain.getPath())
                continue
            except Exception:
                log.warn("Unexpected exception loading object %s. Skipping!",
                         brain.getPath(),
                         exc_info=True)
                continue
            if obj is not None:
                #None defaults to all inedexs
                kwargs = {}
                if permissionsOnly:
                    kwargs = {
                        'update_metadata': False,
                        'idxs': ("allowedRolesAndUsers", )
                    }
                elif hasattr(obj, 'index_object'):
                    obj.index_object()

                catObj(obj, **kwargs)
                log.debug('Catalogued object %s' % obj.absolute_url_path())
            else:
                log.debug('%s does not exists' % brain.getPath())
                globalCat.uncatalog_object(brain.getPath())
            i += 1
            if not i % PROGRESS_CHUNK_SIZE:
                if printProgress:
                    sys.stdout.write(".")
                    sys.stdout.flush()
                else:
                    log.info('Catalogued %s objects' % i)
                if commit:
                    transaction.commit()
        if printProgress:
            sys.stdout.write('\n')
            sys.stdout.flush()
        if commit:
            transaction.commit()
예제 #2
0
def reindex_catalog(zport, permissionsOnly=False, printProgress=True, commit=True):
    globalCat = zport.global_catalog
    with catalog_caching():
        msg = 'objects'
        if permissionsOnly:
            msg = 'permissions'
        log.info('reindexing %s in catalog' % msg)
        i = 0
        catObj = globalCat.catalog_object
        for brain in globalCat():
            log.debug('indexing %s' % brain.getPath())
            try:
                obj = brain.getObject()
            except KeyError:
                log.debug("Could not load object. Deleting: %s", brain.getPath())
                globalCat.uncatalog_object(brain.getPath())
                continue
            except Exception:
                log.warn("Unexpected exception loading object %s. Skipping!", brain.getPath(), exc_info=True)
                continue
            if obj is not None:
                #None defaults to all inedexs
                kwargs = {}
                if permissionsOnly:
                    kwargs = {'update_metadata': False,
                              'idxs': ("allowedRolesAndUsers",)}
                elif hasattr(obj, 'index_object'):
                    obj.index_object()

                catObj(obj, **kwargs)
                log.debug('Catalogued object %s' % obj.absolute_url_path())
            else:
                log.debug('%s does not exists' % brain.getPath())
                globalCat.uncatalog_object(brain.getPath())
            i += 1
            if not i % PROGRESS_CHUNK_SIZE:
                if printProgress:
                    sys.stdout.write(".")
                    sys.stdout.flush()
                else:
                    log.info('Catalogued %s objects' % i)
                if commit:
                    transaction.commit()
        if printProgress:
            sys.stdout.write('\n')
            sys.stdout.flush()
        if commit:
            transaction.commit()
예제 #3
0
    def run(self):
        def stop(ignored):
            reactor.stop()

        def main():
            zport = self.dmd.getPhysicalRoot().zport
            if self.options.createcatalog:
                d = self._createCatalog(zport)
            elif self.options.reindex:
                d = self._reindex(zport)
            d.addBoth(stop)

        if not self.options.createcatalog and not self.options.reindex:
            self.parser.error("Must use one of --createcatalog, --reindex")
        reactor.callWhenRunning(main)
        with catalog_caching():
            reactor.run()
예제 #4
0
    def run(self):

        def stop(ignored):
            reactor.stop()

        def main():
            zport = self.dmd.getPhysicalRoot().zport
            if self.options.createcatalog:
                d = self._createCatalog(zport)
            elif self.options.reindex:
                d = self._reindex(zport)
            d.addBoth(stop)

        if not self.options.createcatalog and not self.options.reindex:
            self.parser.error("Must use one of --createcatalog, --reindex")
        reactor.callWhenRunning(main)
        with catalog_caching():
            reactor.run()
예제 #5
0
def convert_into_document(worker_id,
                          inbox,
                          outbox,
                          buffer_size,
                          permissions_only,
                          print_progress=None):
    ignore_interruptions()
    drop_all_arguments()
    zc = ZenCatalog()
    dmd = zc.dmd
    zport = dmd.getPhysicalRoot().zport
    catalog = dmd.global_catalog
    vals = []
    documentIds = []
    uids = []
    counter = ProgressCounter(print_progress=print_progress)

    # Apply monkey patches ...
    def index_object(self, documentId, obj, threshold=None):
        val = self._evaluate(obj)

        cuv = self._val2UnindexVal
        if val is not None and cuv is not None: unindexVal = cuv(val)
        else: unindexVal = val

        documentIds.append(documentId)

        if val is not None:
            vals.append((self.id, val, unindexVal))
            return 1
        return 0

    ManagableIndex.index_object = index_object

    def mpi_index_object(self, docId, obj, threshold=None):
        f = getattr(obj, self.id, None)
        if f is not None:
            if safe_callable(f):
                try:
                    paths = f()
                except AttributeError:
                    return 0
            else:
                paths = f
        else:
            try:
                paths = obj.getPhysicalPath()
            except AttributeError:
                return 0
        if paths:
            paths = _recursivePathSplit(paths)
            if not _isSequenceOfSequences(paths):
                paths = [paths]
            vals.append((self.id, paths, None))
            return 1
        return 0

    MultiPathIndex.index_object = mpi_index_object

    orig_catalogObject = Catalog.catalogObject

    def catalogObject(self,
                      object,
                      uid,
                      threshold=None,
                      idxs=None,
                      update_metadata=1):
        uids.append(uid)
        return orig_catalogObject(self, object, uid, threshold, idxs,
                                  update_metadata)

    Catalog.catalogObject = catalogObject

    def convertToDocument(primary_path):
        while True:
            try:
                try:
                    obj = dmd.unrestrictedTraverse(primary_path)
                except (AttributeError, ClientDisconnected, DisconnectedError):
                    raise
                except KeyError:
                    log.debug("Could not load object: %s", primary_path)
                    put_or_die(outbox,
                               (None, primary_path, None, None))  # uncatalog
                    counter.increment()
                    continue
                except Exception:
                    log.warn(
                        "Unexpected exception loading object %s. Skipping!",
                        primary_path,
                        exc_info=True)
                    counter.increment()
                    continue
                if obj is None:
                    log.debug("%s does not exist", primary_path)
                    put_or_die(outbox,
                               (None, primary_path, None, None))  # uncatalog
                    counter.increment()
                    continue
                if permissions_only:
                    catalog.catalog_object(obj,
                                           update_metadata=False,
                                           idxs=("allowedRolesAndUsers", ))
                else:
                    # We intentionally don't do legacy indexing:
                    # if hasattr(obj, 'index_object'): obj.index_object()
                    catalog.catalog_object(obj)
                if documentIds:
                    uid = uids.pop()
                    documentId = documentIds[0]
                    metadata = catalog._catalog.data.get(documentId, {})
                    put_or_die(outbox, (documentId, uid[:], vals[:], metadata))
                    counter.increment()
            except (AttributeError, ClientDisconnected, DisconnectedError):
                reconnect(zc)
                continue
            finally:
                # clear lists
                vals[:] = []
                uids[:] = []
                documentIds[:] = []
                # Invalidation allows object to be garbage collected
                inv = getattr(obj, '_p_invalidate', None)
                if inv is not None: inv()
            break

    # Process my inbox ...
    with catalog_caching():
        tick = time.time()
        while True:
            try:
                if (counter.count % 100 == 0) and (time.time() - tick > 5.0):
                    transaction.abort()  # Allow garbage collection
                    tick = time.time()
                primary_path = inbox.get_nowait()
            except Empty:
                check_for_dead_parent()
                time.sleep(0.1)
                continue
            if primary_path is None:
                break  # End of inbox. We're done here.
            try:
                convertToDocument(primary_path)
            except Exception:
                log.info("Error indexing object %s. Skipping.",
                         primary_path,
                         exc_info=log.isEnabledFor(logging.DEBUG))
예제 #6
0
def source_from_zport(control, outbox, resume, print_progress=None):
    ignore_interruptions()
    drop_all_arguments()
    zc = ZenCatalog()
    zport = zc.dmd.getPhysicalRoot().zport
    counter = ProgressCounter(print_progress=print_progress)

    def find_kids(obj, call_tree):
        nones_counted = 0
        for kid in recurse(obj, call_tree):
            if kid is None:
                nones_counted += 1
            else:
                yield kid
        if nones_counted:
            try:
                description = obj.getPrimaryPath()
            except Exception:
                description = repr(obj)
            log.error("Object %s has a None child!" % description)

    def recurse(obj, call_tree=None):
        if call_tree is None:
            call_tree = RecursiveDefaultDict()
        while True:
            check_for_dead_parent()
            check_for_interrupt(control)
            try:
                obj_id = obj.id
                tree = call_tree[obj_id]
                if tree is not False:
                    if not isinstance(obj, GlobalCatalog):
                        if isinstance(obj, ObjectManager):
                            for ob in obj.objectValues():
                                for kid in find_kids(ob, tree):
                                    yield kid
                            if isinstance(obj, ZenModelRM):
                                for rel in obj.getRelationships():
                                    if isinstance(rel, ToManyContRelationship):
                                        for kid in find_kids(rel, tree):
                                            yield kid
                                yield obj
                        elif isinstance(obj, ToManyContRelationship):
                            for ob in obj.objectValuesGen():
                                for kid in find_kids(ob, tree):
                                    yield kid
                    # invalidation allows object to be garbage collected
                    inv = getattr(obj, '_p_invalidate', None)
                    if inv is not None: inv()
                    call_tree[obj_id] = False
            except (AttributeError, ClientDisconnected, DisconnectedError):
                _reconnect(zc)
                continue
            break

    try:
        if resume:
            with open(CALL_TREE_DUMP_FILE, 'rb') as f:
                call_tree = pickle.load(f)
        else:
            call_tree = RecursiveDefaultDict()
    except (pickle.PickleError, IOError):
        call_tree = RecursiveDefaultDict()
    try:
        with catalog_caching():
            tick = time.time()
            for obj in recurse(zport, call_tree):
                put_or_die(outbox, obj.getPrimaryPath())
                if (counter.increment() % 100
                        == 0) and (time.time() - tick) > 5.0:
                    transaction.abort()  # allow garbage collection
                    tick = time.time()
    except KeyboardInterrupt:
        if call_tree:
            with open(CALL_TREE_DUMP_FILE, 'wb') as f:
                pickle.dump(call_tree, f)
    else:
        quietly_remove(CALL_TREE_DUMP_FILE)
    finally:
        transaction.abort()
예제 #7
0
def convert_into_document(worker_id, inbox, outbox, buffer_size, permissions_only, print_progress=None):
    ignore_interruptions()
    drop_all_arguments()
    zc          = ZenCatalog()
    dmd         = zc.dmd
    zport       = dmd.getPhysicalRoot().zport
    catalog     = dmd.global_catalog
    vals        = []
    documentIds = []
    uids        = []
    counter     = ProgressCounter(print_progress=print_progress)

    # Apply monkey patches ...
    def index_object(self, documentId, obj, threshold=None):
        val= self._evaluate(obj)
      
        cuv = self._val2UnindexVal
        if val is not None and cuv is not None: unindexVal = cuv(val)
        else: unindexVal = val

        documentIds.append(documentId)

        if val is not None:
            vals.append((self.id, val, unindexVal))
            return 1
        return 0
    ManagableIndex.index_object = index_object

    def mpi_index_object(self, docId, obj, threshold=None):
        f = getattr(obj, self.id, None)
        if f is not None:
            if safe_callable(f):
                try:
                    paths = f()
                except AttributeError:
                    return 0
            else:
                paths = f
        else:
            try:
                paths = obj.getPhysicalPath()
            except AttributeError:
                return 0
        if paths:
            paths = _recursivePathSplit(paths)
            if not _isSequenceOfSequences(paths):
                paths = [paths]
            vals.append((self.id, paths, None))
            return 1
        return 0
    MultiPathIndex.index_object = mpi_index_object


    orig_catalogObject = Catalog.catalogObject
    def catalogObject(self, object, uid, threshold=None, 
                      idxs=None, update_metadata=1):
        uids.append(uid)
        return orig_catalogObject(self, object, uid, threshold, 
                                  idxs, update_metadata)
    Catalog.catalogObject = catalogObject

    def convertToDocument(primary_path):
        while True:
            try:
                try:
                    obj = dmd.unrestrictedTraverse(primary_path)
                except (AttributeError, ClientDisconnected, DisconnectedError):
                    raise
                except KeyError:
                    log.debug("Could not load object: %s", primary_path)
                    put_or_die(outbox, (None, primary_path, None, None)) # uncatalog
                    counter.increment()
                    continue
                except Exception:
                    log.warn("Unexpected exception loading object %s. Skipping!", primary_path, exc_info=True)
                    counter.increment()
                    continue
                if obj is None:
                    log.debug("%s does not exist", primary_path)
                    put_or_die(outbox, (None, primary_path, None, None)) # uncatalog
                    counter.increment()
                    continue
                if permissions_only:
                    catalog.catalog_object(obj, update_metadata=False,
                                                idxs=("allowedRolesAndUsers",))
                else:
                    # We intentionally don't do legacy indexing:
                    # if hasattr(obj, 'index_object'): obj.index_object()
                    catalog.catalog_object(obj)
                if documentIds:
                    uid = uids.pop()
                    documentId = documentIds[0]
                    metadata = catalog._catalog.data.get(documentId, {})
                    put_or_die(outbox, (documentId, uid[:], vals[:], metadata))
                    counter.increment()
            except (AttributeError, ClientDisconnected, DisconnectedError):
                reconnect(zc)
                continue
            finally:
                # clear lists
                vals[:] = []
                uids[:] = []
                documentIds[:] = []
                # Invalidation allows object to be garbage collected
                inv = getattr(obj, '_p_invalidate', None)
                if inv is not None: inv()
            break

    # Process my inbox ...
    with catalog_caching():
        tick = time.time()
        while True:
            try:
                if (counter.count % 100 == 0) and (time.time() - tick > 5.0):
                  transaction.abort() # Allow garbage collection
                  tick = time.time()
                primary_path = inbox.get_nowait()
            except Empty:
                check_for_dead_parent()
                time.sleep(0.1)
                continue
            if primary_path is None:
                break # End of inbox. We're done here.
            try:
                convertToDocument(primary_path)
            except Exception:
                log.info("Error indexing object %s. Skipping.", primary_path,
                         exc_info = log.isEnabledFor(logging.DEBUG))
예제 #8
0
def source_from_zport(control, outbox, resume, print_progress=None):
    ignore_interruptions()
    drop_all_arguments()
    zc = ZenCatalog()
    zport = zc.dmd.getPhysicalRoot().zport
    counter = ProgressCounter(print_progress=print_progress)

    def find_kids(obj, call_tree):
        nones_counted = 0
        for kid in recurse(obj, call_tree):
            if kid is None:
                nones_counted += 1
            else:
                yield kid
        if nones_counted:
            try:
                description = obj.getPrimaryPath()
            except Exception:
                description = repr(obj)
            log.error("Object %s has a None child!" % description)

    def recurse(obj, call_tree=None):
        if call_tree is None:
            call_tree = RecursiveDefaultDict()
        while True:
            check_for_dead_parent()
            check_for_interrupt(control)
            try:
                obj_id = obj.id
                tree = call_tree[obj_id]
                if tree is not False:
                    if not isinstance(obj, GlobalCatalog):
                        if isinstance(obj, ObjectManager):
                            for ob in obj.objectValues():
                                for kid in find_kids(ob, tree):
                                    yield kid
                            if isinstance(obj, ZenModelRM):
                                for rel in obj.getRelationships():
                                    if isinstance(rel, ToManyContRelationship):
                                        for kid in find_kids(rel, tree):
                                            yield kid
                                yield obj
                        elif isinstance(obj, ToManyContRelationship):
                            for ob in obj.objectValuesGen():
                                for kid in find_kids(ob, tree):
                                    yield kid
                    # invalidation allows object to be garbage collected
                    inv = getattr(obj, '_p_invalidate', None)
                    if inv is not None: inv()
                    call_tree[obj_id] = False
            except (AttributeError, ClientDisconnected, DisconnectedError):
                _reconnect(zc)
                continue
            break
    try:
        if resume:
            with open(CALL_TREE_DUMP_FILE, 'rb') as f:
                call_tree = pickle.load(f)
        else:
            call_tree = RecursiveDefaultDict()
    except (pickle.PickleError, IOError):
        call_tree = RecursiveDefaultDict()
    try:
        with catalog_caching():
            tick = time.time()
            for obj in recurse(zport, call_tree):
                put_or_die(outbox, obj.getPrimaryPath())
                if (counter.increment() % 100 == 0) and (time.time() - tick) > 5.0:
                  transaction.abort() # allow garbage collection
                  tick = time.time()
    except KeyboardInterrupt:
        if call_tree:
            with open(CALL_TREE_DUMP_FILE, 'wb') as f:
                pickle.dump(call_tree, f)
    else:
        quietly_remove(CALL_TREE_DUMP_FILE)
    finally:
        transaction.abort()