コード例 #1
0
    def reindex(self, batch=1000, skip=0):
        """ find all contentish objects (meaning all objects derived from one
            of the catalog mixin classes) and (re)indexes them """
        manager = queryUtility(ISolrConnectionManager)
        proc = SolrIndexProcessor(manager)
        conn = manager.getConnection()
        zodb_conn = self.context._p_jar
        log = self.mklog()
        log('reindexing solr catalog...\n')
        if skip:
            log('skipping indexing of %d object(s)...\n' % skip)
        real = timer()  # real time
        lap = timer()  # real lap time (for intermediate commits)
        cpu = timer(clock)  # cpu time
        processed = 0
        schema = manager.getSchema()
        key = schema.uniqueKey
        updates = {}  # list to hold data to be updated
        flush = lambda: conn.flush()
        flush = notimeout(flush)

        def checkPoint():
            for boost_values, data in updates.values():
                conn.add(boost_values=boost_values, **data)
            updates.clear()
            msg = 'intermediate commit (%d items processed, ' \
                  'last batch in %s)...\n' % (processed, lap.next())
            log(msg)
            logger.info(msg)
            flush()
            zodb_conn.cacheGC()

        cpi = checkpointIterator(checkPoint, batch)
        count = 0
        for path, obj in findObjects(self.context):
            if indexable(obj):
                if getOwnIndexMethod(obj, 'indexObject') is not None:
                    log('skipping indexing of %r via private method.\n' % obj)
                    continue
                count += 1
                if count <= skip:
                    continue
                data, missing = proc.getData(obj)
                prepareData(data)
                if not missing:
                    value = data.get(key, None)
                    if value is not None:
                        updates[value] = (boost_values(obj, data), data)
                        processed += 1
                        cpi.next()
                else:
                    log('missing data, skipping indexing of %r.\n' % obj)
        checkPoint()
        conn.commit()
        log('solr index rebuilt.\n')
        msg = 'processed %d items in %s (%s cpu time).'
        msg = msg % (processed, real.next(), cpu.next())
        log(msg)
        logger.info(msg)
コード例 #2
0
ファイル: maintenance.py プロジェクト: Jarn/collective.solr
    def reindex(self, batch=1000, skip=0):
        """ find all contentish objects (meaning all objects derived from one
            of the catalog mixin classes) and (re)indexes them """
        manager = queryUtility(ISolrConnectionManager)
        proc = SolrIndexProcessor(manager)
        conn = manager.getConnection()
        zodb_conn = self.context._p_jar
        log = self.mklog()
        log("reindexing solr catalog...\n")
        if skip:
            log("skipping indexing of %d object(s)...\n" % skip)
        real = timer()  # real time
        lap = timer()  # real lap time (for intermediate commits)
        cpu = timer(clock)  # cpu time
        processed = 0
        schema = manager.getSchema()
        key = schema.uniqueKey
        updates = {}  # list to hold data to be updated
        flush = lambda: conn.flush()
        flush = notimeout(flush)

        def checkPoint():
            for boost_values, data in updates.values():
                conn.add(boost_values=boost_values, **data)
            updates.clear()
            msg = "intermediate commit (%d items processed, " "last batch in %s)...\n" % (processed, lap.next())
            log(msg)
            logger.info(msg)
            flush()
            zodb_conn.cacheGC()

        cpi = checkpointIterator(checkPoint, batch)
        count = 0
        for path, obj in findObjects(self.context):
            if indexable(obj):
                if getOwnIndexMethod(obj, "indexObject") is not None:
                    log("skipping indexing of %r via private method.\n" % obj)
                    continue
                count += 1
                if count <= skip:
                    continue
                data, missing = proc.getData(obj)
                prepareData(data)
                if not missing:
                    value = data.get(key, None)
                    if value is not None:
                        updates[value] = (boost_values(obj, data), data)
                        processed += 1
                        cpi.next()
                else:
                    log("missing data, skipping indexing of %r.\n" % obj)
        checkPoint()
        conn.commit()
        log("solr index rebuilt.\n")
        msg = "processed %d items in %s (%s cpu time)."
        msg = msg % (processed, real.next(), cpu.next())
        log(msg)
        logger.info(msg)
コード例 #3
0
    def cleanup(self, batch=1000):
        """ remove entries from solr that don't have a corresponding Zope
            object or have a different UID than the real object"""
        manager = queryUtility(ISolrConnectionManager)
        proc = SolrIndexProcessor(manager)
        conn = manager.getConnection()
        log = self.mklog(use_std_log=True)
        log('cleaning up solr index...\n')
        key = manager.getSchema().uniqueKey

        start = 0
        resp = SolrResponse(conn.search(q='*:*', rows=batch, start=start))
        res = resp.results()
        log('%s items in solr catalog\n' % resp.response.numFound)
        deleted = 0
        reindexed = 0
        while len(res) > 0:
            for flare in res:
                try:
                    ob = PloneFlare(flare).getObject()
                except Exception as err:
                    log('Error getting object, removing: %s (%s)\n' % (
                        flare['path_string'], err))
                    conn.delete(flare[key])
                    deleted += 1
                    continue
                if not IUUIDAware.providedBy(ob):
                    log('Object %s of type %s does not support uuids, skipping.\n' %
                        ('/'.join(ob.getPhysicalPath()), ob.meta_type))
                    continue
                uuid = IUUID(ob)
                if uuid != flare[key]:
                    log('indexed under wrong UID, removing: %s\n' %
                        flare['path_string'])
                    conn.delete(flare[key])
                    deleted += 1
                    realob_res = SolrResponse(conn.search(q='%s:%s' %
                                              (key, uuid))).results()
                    if len(realob_res) == 0:
                        log('no sane entry for last object, reindexing\n')
                        data, missing = proc.getData(ob)
                        prepareData(data)
                        if not missing:
                            boost = boost_values(ob, data)
                            conn.add(boost_values=boost, **data)
                            reindexed += 1
                        else:
                            log('  missing data, cannot index.\n')
            log('handled batch of %d items, commiting\n' % len(res))
            conn.commit()
            start += batch
            resp = SolrResponse(conn.search(q='*:*', rows=batch, start=start))
            res = resp.results()
        msg = 'solr cleanup finished, %s item(s) removed, %s item(s) reindexed\n' % (deleted, reindexed)
        log(msg)
        logger.info(msg)
コード例 #4
0
    def reindex(self, batch=1000, skip=0, limit=0):
        """ find all contentish objects (meaning all objects derived from one
            of the catalog mixin classes) and (re)indexes them """
        manager = queryUtility(ISolrConnectionManager)
        proc = SolrIndexProcessor(manager)
        conn = manager.getConnection()
        zodb_conn = self.context._p_jar
        log = self.mklog()
        log('reindexing solr catalog...\n')
        if skip:
            log('skipping indexing of %d object(s)...\n' % skip)
        if limit:
            log('limiting indexing to %d object(s)...\n' % limit)
        real = timer()          # real time
        lap = timer()           # real lap time (for intermediate commits)
        cpu = timer(clock)      # cpu time
        processed = 0
        schema = manager.getSchema()
        key = schema.uniqueKey
        updates = {}            # list to hold data to be updated
        flush = lambda: conn.flush()
        flush = notimeout(flush)

        def checkPoint():
            for boost_values, data in updates.values():
                adder = data.pop('_solr_adder')
                adder(conn, boost_values=boost_values, **data)
            updates.clear()
            msg = 'intermediate commit (%d items processed, ' \
                  'last batch in %s)...\n' % (processed, lap.next())
            log(msg)
            logger.info(msg)
            flush()
            zodb_conn.cacheGC()
        cpi = checkpointIterator(checkPoint, batch)
        count = 0
        for path, obj in findObjects(self.context):
            if ICheckIndexable(obj)():
                count += 1
                if count <= skip:
                    continue
                data, missing = proc.getData(obj)
                prepareData(data)
                if not missing:
                    value = data.get(key, None)
                    if value is not None:
                        log('indexing %r\n' % obj)
                        pt = data.get('portal_type', 'default')
                        adder = queryAdapter(obj, ISolrAddHandler, name=pt)
                        if adder is None:
                            adder = DefaultAdder(obj)
                        data['_solr_adder'] = adder
                        updates[value] = (boost_values(obj, data), data)
                        processed += 1
                        cpi.next()
                else:
                    log('missing data, skipping indexing of %r.\n' % obj)
                if limit and count >= (skip + limit):
                    break
        checkPoint()
        conn.commit()
        log('solr index rebuilt.\n')
        msg = 'processed %d items in %s (%s cpu time).'
        msg = msg % (processed, real.next(), cpu.next())
        log(msg)
        logger.info(msg)
コード例 #5
0
    def cleanup(self, batch=1000):
        """remove entries from solr that don't have a corresponding Zope
        object or have a different UID than the real object"""
        manager = queryUtility(ISolrConnectionManager)
        proc = SolrIndexProcessor(manager)
        conn = manager.getConnection()
        log = self.mklog(use_std_log=True)
        log("cleaning up solr index...\n")
        key = manager.getSchema().uniqueKey

        start = 0
        resp = SolrResponse(conn.search(q="*:*", rows=batch, start=start))
        res = resp.results()
        log("%s items in solr catalog\n" % resp.response.numFound)
        deleted = 0
        reindexed = 0
        while len(res) > 0:
            for flare in res:
                try:
                    ob = PloneFlare(flare).getObject()
                except Exception as err:
                    log("Error getting object, removing: %s (%s)\n" %
                        (flare["path_string"], err))
                    conn.delete(flare[key])
                    deleted += 1
                    continue
                if ob is None:
                    log("Object not found, removing: %s\n" %
                        (flare["path_string"]))
                    conn.delete(flare[key])
                    deleted += 1
                    continue
                if not IUUIDAware.providedBy(ob):
                    no_skipping_msg = ("Object %s of type %s does not " +
                                       "support uuids, skipping.\n")
                    log(no_skipping_msg %
                        ("/".join(ob.getPhysicalPath()), ob.meta_type))
                    continue
                uuid = IUUID(ob)
                if uuid != flare[key]:
                    log("indexed under wrong UID, removing: %s\n" %
                        flare["path_string"])
                    conn.delete(flare[key])
                    deleted += 1
                    realob_res = SolrResponse(
                        conn.search(q="%s:%s" % (key, uuid))).results()
                    if len(realob_res) == 0:
                        log("no sane entry for last object, reindexing\n")
                        data, missing = proc.getData(ob)
                        prepareData(data)
                        if not missing:
                            boost = boost_values(ob, data)
                            conn.add(boost_values=boost, **data)
                            reindexed += 1
                        else:
                            log("  missing data, cannot index.\n")
            log("handled batch of %d items, committing\n" % len(res))
            conn.commit()
            start += batch
            resp = SolrResponse(conn.search(q="*:*", rows=batch, start=start))
            res = resp.results()
        finished_msg = ("solr cleanup finished, %s item(s) removed, " +
                        "%s item(s) reindexed\n")
        msg = finished_msg % (deleted, reindexed)
        log(msg)
        logger.info(msg)
コード例 #6
0
    def reindex(
        self,
        batch=1000,
        skip=0,
        limit=0,
        ignore_portal_types=None,
        only_portal_types=None,
        idxs=[],
        ignore_exceptions=False,
    ):
        """find all contentish objects (meaning all objects derived from one
        of the catalog mixin classes) and (re)indexes them"""

        if ignore_portal_types and only_portal_types:
            raise ValueError("It is not possible to combine "
                             "ignore_portal_types with only_portal_types")

        atomic = idxs != []
        manager = queryUtility(ISolrConnectionManager)
        proc = SolrIndexProcessor(manager)
        conn = manager.getConnection()
        zodb_conn = self.context._p_jar
        log = self.mklog()
        log("reindexing solr catalog...\n")
        if skip:
            log("skipping indexing of %d object(s)...\n" % skip)
        if limit:
            log("limiting indexing to %d object(s)...\n" % limit)
        real = timer()  # real time
        lap = timer()  # real lap time (for intermediate commits)
        cpu = timer(process_time)  # cpu time
        processed = 0
        schema = manager.getSchema()
        key = schema.uniqueKey
        updates = {}  # list to hold data to be updated

        def flush():
            return conn.commit(soft=True)

        flush = notimeout(flush)

        def checkPoint():
            for my_boost_values, data in updates.values():
                adder = data.pop("_solr_adder")
                try:
                    adder(conn, boost_values=my_boost_values, **data)
                except Exception as e:
                    logger.warning("Error %s @ %s", e, data["path_string"])
                    if not ignore_exceptions:
                        raise
            updates.clear()
            msg = ("intermediate commit (%d items processed, "
                   "last batch in %s)...\n" % (processed, next(lap)))
            log(msg)
            logger.info(msg)
            flush()
            zodb_conn.cacheGC()

        cpi = checkpointIterator(checkPoint, batch)
        count = 0

        if atomic:
            log("indexing only {0} \n".format(idxs))

        for path, obj in findObjects(self.context):
            if ICheckIndexable(obj)():
                if getOwnIndexMethod:
                    if getOwnIndexMethod(obj, "indexObject") is not None:
                        log("skipping indexing of %r via private method.\n" %
                            obj)
                        continue

                count += 1
                if count <= skip:
                    continue

                if ignore_portal_types:
                    if obj.portal_type in ignore_portal_types:
                        continue

                if only_portal_types:
                    if obj.portal_type not in only_portal_types:
                        continue

                attributes = None
                if atomic:
                    attributes = idxs

                # For atomic updates to work the uniqueKey must be present
                # in *every* update operation.
                if attributes and key not in attributes:
                    attributes.append(key)
                data, missing = proc.getData(obj, attributes=attributes)
                prepareData(data)

                if not missing or atomic:
                    value = data.get(key, None)
                    if value is not None:
                        log("indexing %r\n" % obj)

                        pt = data.get("portal_type", "default")
                        adder = queryAdapter(obj, ISolrAddHandler, name=pt)
                        if adder is None:
                            adder = DefaultAdder(obj)
                        data["_solr_adder"] = adder
                        updates[value] = (boost_values(obj, data), data)
                        processed += 1
                        next(cpi)
                else:
                    log("missing data, skipping indexing of %r.\n" % obj)
                if limit and count >= (skip + limit):
                    break

        checkPoint()
        conn.commit()
        log("solr index rebuilt.\n")
        msg = "processed %d items in %s (%s cpu time)."
        msg = msg % (processed, next(real), next(cpu))
        log(msg)
        logger.info(msg)
コード例 #7
0
    def reindex(self, batch=1000, skip=0, limit=0, ignore_portal_types=None,
                only_portal_types=None):
        """ find all contentish objects (meaning all objects derived from one
            of the catalog mixin classes) and (re)indexes them """

        if ignore_portal_types and only_portal_types:
            raise ValueError("It is not possible to combine "
                             "ignore_portal_types with only_portal_types")

        manager = queryUtility(ISolrConnectionManager)
        proc = SolrIndexProcessor(manager)
        conn = manager.getConnection()
        zodb_conn = self.context._p_jar
        log = self.mklog()
        log('reindexing solr catalog...\n')
        if skip:
            log('skipping indexing of %d object(s)...\n' % skip)
        if limit:
            log('limiting indexing to %d object(s)...\n' % limit)
        real = timer()          # real time
        lap = timer()           # real lap time (for intermediate commits)
        cpu = timer(clock)      # cpu time
        processed = 0
        schema = manager.getSchema()
        key = schema.uniqueKey
        updates = {}            # list to hold data to be updated
        flush = lambda: conn.flush()
        flush = notimeout(flush)

        def checkPoint():
            for my_boost_values, data in updates.values():
                adder = data.pop('_solr_adder')
                adder(conn, boost_values=my_boost_values, **data)
            updates.clear()
            msg = 'intermediate commit (%d items processed, ' \
                  'last batch in %s)...\n' % (processed, lap.next())
            log(msg)
            logger.info(msg)
            flush()
            zodb_conn.cacheGC()
        cpi = checkpointIterator(checkPoint, batch)
        count = 0
        for path, obj in findObjects(self.context):
            if ICheckIndexable(obj)():
                count += 1
                if count <= skip:
                    continue

                if ignore_portal_types:
                    if obj.portal_type in ignore_portal_types:
                        continue

                if only_portal_types:
                    if obj.portal_type not in only_portal_types:
                        continue

                data, missing = proc.getData(obj)
                prepareData(data)
                if not missing:
                    value = data.get(key, None)
                    if value is not None:
                        log('indexing %r\n' % obj)
                        pt = data.get('portal_type', 'default')
                        adder = queryAdapter(obj, ISolrAddHandler, name=pt)
                        if adder is None:
                            adder = DefaultAdder(obj)
                        data['_solr_adder'] = adder
                        updates[value] = (boost_values(obj, data), data)
                        processed += 1
                        cpi.next()
                else:
                    log('missing data, skipping indexing of %r.\n' % obj)
                if limit and count >= (skip + limit):
                    break

        checkPoint()
        conn.commit()
        log('solr index rebuilt.\n')
        msg = 'processed %d items in %s (%s cpu time).'
        msg = msg % (processed, real.next(), cpu.next())
        log(msg)
        logger.info(msg)