def reindex(self, batch=1000, skip=0):
     """ find all contentish objects (meaning all objects derived from one
         of the catalog mixin classes) and (re)indexes them """
     requestFactory = queryUtility(IRequestFactory)
     indexProcessor = queryUtility(IZeroCMSIndexQueueProcessor, name="zerocms")
     zodb_conn = self.context._p_jar
     log = self.mklog()
     log('reindexing documents to ZeroCMS...\n')
     if skip:
         log('skipping indexing of %d object(s)...\n' % skip)
     real = timer()          # real time
     lap = timer()           # real lap time (for intermediate commits)
     cpu = timer(clock)      # cpu time
     processed = 0
     updates = {}            # list to hold data to be updated
     count = 0
     for path, obj in findObjects(self.context):
         if indexable(obj):
             if getOwnIndexMethod(obj, 'indexObject') is not None:
                 log('skipping indexing of %r via private method.\n' % obj)
                 continue
             count += 1
             if count <= skip:
                 continue
             indexProcessor.index(obj)
             processed += 1
     zodb_conn.cacheGC();
     log('All documents exported to ZeroCMS.\n')
     msg = 'processed %d items in %s (%s cpu time).'
     msg = msg % (processed, real.next(), cpu.next())
     log(msg)
     logger.info(msg)
    def reindex(self, batch=1000, skip=0):
        """ find all contentish objects (meaning all objects derived from one
            of the catalog mixin classes) and (re)indexes them """
        manager = queryUtility(ISolrConnectionManager)
        proc = SolrIndexProcessor(manager)
        conn = manager.getConnection()
        zodb_conn = self.context._p_jar
        log = self.mklog()
        log('reindexing solr catalog...\n')
        if skip:
            log('skipping indexing of %d object(s)...\n' % skip)
        real = timer()  # real time
        lap = timer()  # real lap time (for intermediate commits)
        cpu = timer(clock)  # cpu time
        processed = 0
        schema = manager.getSchema()
        key = schema.uniqueKey
        updates = {}  # list to hold data to be updated
        flush = lambda: conn.flush()
        flush = notimeout(flush)

        def checkPoint():
            for boost_values, data in updates.values():
                conn.add(boost_values=boost_values, **data)
            updates.clear()
            msg = 'intermediate commit (%d items processed, ' \
                  'last batch in %s)...\n' % (processed, lap.next())
            log(msg)
            logger.info(msg)
            flush()
            zodb_conn.cacheGC()

        cpi = checkpointIterator(checkPoint, batch)
        count = 0
        for path, obj in findObjects(self.context):
            if indexable(obj):
                if getOwnIndexMethod(obj, 'indexObject') is not None:
                    log('skipping indexing of %r via private method.\n' % obj)
                    continue
                count += 1
                if count <= skip:
                    continue
                data, missing = proc.getData(obj)
                prepareData(data)
                if not missing:
                    value = data.get(key, None)
                    if value is not None:
                        updates[value] = (boost_values(obj, data), data)
                        processed += 1
                        cpi.next()
                else:
                    log('missing data, skipping indexing of %r.\n' % obj)
        checkPoint()
        conn.commit()
        log('solr index rebuilt.\n')
        msg = 'processed %d items in %s (%s cpu time).'
        msg = msg % (processed, real.next(), cpu.next())
        log(msg)
        logger.info(msg)
 def testFindCommentObjects(self):
     """Check that comments are also found"""
     comment_id = self._add_comment('lala')
     found = list(findObjects(self.layer['portal'].news))
     self.assertIn(
         '++conversation++default/{0}'.format(comment_id),
         self.ids(found)
     )
Exemple #4
0
    def reindex(self, batch=1000, skip=0):
        """ find all contentish objects (meaning all objects derived from one
            of the catalog mixin classes) and (re)indexes them """
        manager = queryUtility(ISolrConnectionManager)
        proc = SolrIndexProcessor(manager)
        conn = manager.getConnection()
        zodb_conn = self.context._p_jar
        log = self.mklog()
        log("reindexing solr catalog...\n")
        if skip:
            log("skipping indexing of %d object(s)...\n" % skip)
        real = timer()  # real time
        lap = timer()  # real lap time (for intermediate commits)
        cpu = timer(clock)  # cpu time
        processed = 0
        schema = manager.getSchema()
        key = schema.uniqueKey
        updates = {}  # list to hold data to be updated
        flush = lambda: conn.flush()
        flush = notimeout(flush)

        def checkPoint():
            for boost_values, data in updates.values():
                conn.add(boost_values=boost_values, **data)
            updates.clear()
            msg = "intermediate commit (%d items processed, " "last batch in %s)...\n" % (processed, lap.next())
            log(msg)
            logger.info(msg)
            flush()
            zodb_conn.cacheGC()

        cpi = checkpointIterator(checkPoint, batch)
        count = 0
        for path, obj in findObjects(self.context):
            if indexable(obj):
                if getOwnIndexMethod(obj, "indexObject") is not None:
                    log("skipping indexing of %r via private method.\n" % obj)
                    continue
                count += 1
                if count <= skip:
                    continue
                data, missing = proc.getData(obj)
                prepareData(data)
                if not missing:
                    value = data.get(key, None)
                    if value is not None:
                        updates[value] = (boost_values(obj, data), data)
                        processed += 1
                        cpi.next()
                else:
                    log("missing data, skipping indexing of %r.\n" % obj)
        checkPoint()
        conn.commit()
        log("solr index rebuilt.\n")
        msg = "processed %d items in %s (%s cpu time)."
        msg = msg % (processed, real.next(), cpu.next())
        log(msg)
        logger.info(msg)
    def reindex(self, batch=1000, skip=0, limit=0):
        """ find all contentish objects (meaning all objects derived from one
            of the catalog mixin classes) and (re)indexes them """
        manager = queryUtility(ISolrConnectionManager)
        proc = SolrIndexProcessor(manager)
        conn = manager.getConnection()
        zodb_conn = self.context._p_jar
        log = self.mklog()
        log('reindexing solr catalog...\n')
        if skip:
            log('skipping indexing of %d object(s)...\n' % skip)
        if limit:
            log('limiting indexing to %d object(s)...\n' % limit)
        real = timer()          # real time
        lap = timer()           # real lap time (for intermediate commits)
        cpu = timer(clock)      # cpu time
        processed = 0
        schema = manager.getSchema()
        key = schema.uniqueKey
        updates = {}            # list to hold data to be updated
        flush = lambda: conn.flush()
        flush = notimeout(flush)

        def checkPoint():
            for boost_values, data in updates.values():
                adder = data.pop('_solr_adder')
                adder(conn, boost_values=boost_values, **data)
            updates.clear()
            msg = 'intermediate commit (%d items processed, ' \
                  'last batch in %s)...\n' % (processed, lap.next())
            log(msg)
            logger.info(msg)
            flush()
            zodb_conn.cacheGC()
        cpi = checkpointIterator(checkPoint, batch)
        count = 0
        for path, obj in findObjects(self.context):
            if ICheckIndexable(obj)():
                count += 1
                if count <= skip:
                    continue
                data, missing = proc.getData(obj)
                prepareData(data)
                if not missing:
                    value = data.get(key, None)
                    if value is not None:
                        log('indexing %r\n' % obj)
                        pt = data.get('portal_type', 'default')
                        adder = queryAdapter(obj, ISolrAddHandler, name=pt)
                        if adder is None:
                            adder = DefaultAdder(obj)
                        data['_solr_adder'] = adder
                        updates[value] = (boost_values(obj, data), data)
                        processed += 1
                        cpi.next()
                else:
                    log('missing data, skipping indexing of %r.\n' % obj)
                if limit and count >= (skip + limit):
                    break
        checkPoint()
        conn.commit()
        log('solr index rebuilt.\n')
        msg = 'processed %d items in %s (%s cpu time).'
        msg = msg % (processed, real.next(), cpu.next())
        log(msg)
        logger.info(msg)
    def reindex(
        self,
        batch=1000,
        skip=0,
        limit=0,
        ignore_portal_types=None,
        only_portal_types=None,
        idxs=[],
        ignore_exceptions=False,
    ):
        """find all contentish objects (meaning all objects derived from one
        of the catalog mixin classes) and (re)indexes them"""

        if ignore_portal_types and only_portal_types:
            raise ValueError("It is not possible to combine "
                             "ignore_portal_types with only_portal_types")

        atomic = idxs != []
        manager = queryUtility(ISolrConnectionManager)
        proc = SolrIndexProcessor(manager)
        conn = manager.getConnection()
        zodb_conn = self.context._p_jar
        log = self.mklog()
        log("reindexing solr catalog...\n")
        if skip:
            log("skipping indexing of %d object(s)...\n" % skip)
        if limit:
            log("limiting indexing to %d object(s)...\n" % limit)
        real = timer()  # real time
        lap = timer()  # real lap time (for intermediate commits)
        cpu = timer(process_time)  # cpu time
        processed = 0
        schema = manager.getSchema()
        key = schema.uniqueKey
        updates = {}  # list to hold data to be updated

        def flush():
            return conn.commit(soft=True)

        flush = notimeout(flush)

        def checkPoint():
            for my_boost_values, data in updates.values():
                adder = data.pop("_solr_adder")
                try:
                    adder(conn, boost_values=my_boost_values, **data)
                except Exception as e:
                    logger.warning("Error %s @ %s", e, data["path_string"])
                    if not ignore_exceptions:
                        raise
            updates.clear()
            msg = ("intermediate commit (%d items processed, "
                   "last batch in %s)...\n" % (processed, next(lap)))
            log(msg)
            logger.info(msg)
            flush()
            zodb_conn.cacheGC()

        cpi = checkpointIterator(checkPoint, batch)
        count = 0

        if atomic:
            log("indexing only {0} \n".format(idxs))

        for path, obj in findObjects(self.context):
            if ICheckIndexable(obj)():
                if getOwnIndexMethod:
                    if getOwnIndexMethod(obj, "indexObject") is not None:
                        log("skipping indexing of %r via private method.\n" %
                            obj)
                        continue

                count += 1
                if count <= skip:
                    continue

                if ignore_portal_types:
                    if obj.portal_type in ignore_portal_types:
                        continue

                if only_portal_types:
                    if obj.portal_type not in only_portal_types:
                        continue

                attributes = None
                if atomic:
                    attributes = idxs

                # For atomic updates to work the uniqueKey must be present
                # in *every* update operation.
                if attributes and key not in attributes:
                    attributes.append(key)
                data, missing = proc.getData(obj, attributes=attributes)
                prepareData(data)

                if not missing or atomic:
                    value = data.get(key, None)
                    if value is not None:
                        log("indexing %r\n" % obj)

                        pt = data.get("portal_type", "default")
                        adder = queryAdapter(obj, ISolrAddHandler, name=pt)
                        if adder is None:
                            adder = DefaultAdder(obj)
                        data["_solr_adder"] = adder
                        updates[value] = (boost_values(obj, data), data)
                        processed += 1
                        next(cpi)
                else:
                    log("missing data, skipping indexing of %r.\n" % obj)
                if limit and count >= (skip + limit):
                    break

        checkPoint()
        conn.commit()
        log("solr index rebuilt.\n")
        msg = "processed %d items in %s (%s cpu time)."
        msg = msg % (processed, next(real), next(cpu))
        log(msg)
        logger.info(msg)
 def testFindObjects(self):
     found = list(findObjects(self.portal))
     # the starting point itself is returned
     self.assertEqual(found[0], ('', self.portal))
     # but the rest should be the same...
     self.assertEqual(self.ids(found[1:]), self.good)
Exemple #8
0
class SolrMaintenanceView(BrowserView):
    """ helper view for indexing all portal content in Solr """
    def mklog(self, use_std_log=False):
        """ helper to prepend a time stamp to the output """
        write = self.request.RESPONSE.write

        def log(msg, timestamp=True):
            if timestamp:
                msg = strftime('%Y/%m/%d-%H:%M:%S ') + msg
            write(msg)
            if use_std_log:
                logger.info(msg)

        return log

    def optimize(self):
        """ optimize solr indexes """
        manager = queryUtility(ISolrConnectionManager)
        conn = manager.getConnection()
        conn.setTimeout(None)
        conn.commit(optimize=True)
        return 'solr indexes optimized.'

    def clear(self):
        """ clear all data from solr, i.e. delete all indexed objects """
        manager = queryUtility(ISolrConnectionManager)
        uniqueKey = manager.getSchema().uniqueKey
        conn = manager.getConnection()
        conn.setTimeout(None)
        conn.deleteByQuery('%s:[* TO *]' % uniqueKey)
        conn.commit()
        return 'solr index cleared.'

    def reindex(self,
                batch=1000,
                skip=0,
                limit=0,
                ignore_portal_types=None,
                only_portal_types=None,
                idxs=[],
                ignore_exceptions=False):
        """ find all contentish objects (meaning all objects derived from one
            of the catalog mixin classes) and (re)indexes them """

        if ignore_portal_types and only_portal_types:
            raise ValueError("It is not possible to combine "
                             "ignore_portal_types with only_portal_types")

        atomic = idxs != []
        manager = queryUtility(ISolrConnectionManager)
        proc = SolrIndexProcessor(manager)
        conn = manager.getConnection()
        zodb_conn = self.context._p_jar
        log = self.mklog()
        log('reindexing solr catalog...\n')
        if skip:
            log('skipping indexing of %d object(s)...\n' % skip)
        if limit:
            log('limiting indexing to %d object(s)...\n' % limit)
        real = timer()  # real time
        lap = timer()  # real lap time (for intermediate commits)
        cpu = timer(clock)  # cpu time
        processed = 0
        schema = manager.getSchema()
        key = schema.uniqueKey
        updates = {}  # list to hold data to be updated
        flush = lambda: conn.commit(soft=True)
        flush = notimeout(flush)

        def checkPoint():
            for my_boost_values, data in updates.values():
                adder = data.pop('_solr_adder')
                try:
                    adder(conn, boost_values=my_boost_values, **data)
                except Exception, e:
                    logger.warn('Error %s @ %s', e, data['path_string'])
                    if not ignore_exceptions:
                        raise
            updates.clear()
            msg = 'intermediate commit (%d items processed, ' \
                  'last batch in %s)...\n' % (processed, lap.next())
            log(msg)
            logger.info(msg)
            flush()
            zodb_conn.cacheGC()

        cpi = checkpointIterator(checkPoint, batch)
        count = 0

        if atomic:
            log('indexing only {0} \n'.format(idxs))

        for path, obj in findObjects(self.context):
            if ICheckIndexable(obj)():
                count += 1
                if count <= skip:
                    continue

                if ignore_portal_types:
                    if obj.portal_type in ignore_portal_types:
                        continue

                if only_portal_types:
                    if obj.portal_type not in only_portal_types:
                        continue

                attributes = None
                if atomic:
                    attributes = idxs

                # For atomic updates to work the uniqueKey must be present
                # in *every* update operation.
                if attributes and key not in attributes:
                    attributes.append(key)
                data, missing = proc.getData(obj, attributes=attributes)
                prepareData(data)

                if not missing or atomic:
                    value = data.get(key, None)
                    if value is not None:
                        log('indexing %r\n' % obj)

                        pt = data.get('portal_type', 'default')
                        adder = queryAdapter(obj, ISolrAddHandler, name=pt)
                        if adder is None:
                            adder = DefaultAdder(obj)
                        data['_solr_adder'] = adder
                        updates[value] = (boost_values(obj, data), data)
                        processed += 1
                        cpi.next()
                else:
                    log('missing data, skipping indexing of %r.\n' % obj)
                if limit and count >= (skip + limit):
                    break

        checkPoint()
        conn.commit()
        log('solr index rebuilt.\n')
        msg = 'processed %d items in %s (%s cpu time).'
        msg = msg % (processed, real.next(), cpu.next())
        log(msg)
        logger.info(msg)
Exemple #9
0
 def testFindObjects(self):
     found = list(findObjects(self.portal))
     # the starting point itself is returned
     self.assertEqual(found[0], ('', self.portal))
     # but the rest should be the same...
     self.assertEqual(self.ids(found[1:]), self.good)
Exemple #10
0
def solr_dump_catalog(app, args):
    """Dumps the catalog and metadata contents into a nested directory
    structure full of pickles containing the information in dict format.
    These can be updated by later re-runs and used to import the data via the
    `update_solr` command. You can optionally specify the id of the Plone site
    as the first command line argument.
    """
    _enable_log()
    db = app._p_jar.db()
    from Testing import makerequest
    root = makerequest.makerequest(app)
    site = _get_site(root, args)
    data_dir = _data_dir(site.getId())
    _make_dir(data_dir)

    catalog = site.portal_catalog
    _catalog = catalog._catalog
    catalog_length = len(catalog)
    uids_get = _catalog.uids.get

    conn = _solr_connection()
    schema = conn.getSchema()
    wanted = set(schema.keys())
    # We need the data from path
    wanted.add('path')
    conn.close()

    logger.info('Process %s catalog items' % catalog_length)

    from collective.indexing.indexer import getOwnIndexMethod
    from collective.solr.indexer import indexable
    from collective.solr.utils import findObjects
    from plone.app.folder.nogopip import GopipIndex
    from Products.PluginIndexes.DateIndex.DateIndex import DateIndex
    from Products.PluginIndexes.DateRangeIndex import DateRangeIndex
    from Products.ZCTextIndex import WidCode
    from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex

    indexes = catalog.indexes()
    indexes.sort()

    gopip_indexes = set()
    for indexname in indexes:
        if indexname in _catalog.schema:
            # There's no need to get metadata from the indexes
            continue
        if indexname not in wanted:
            # skip indexes not present in the Solr schema
            continue
        logger.info('Dumping index: %s' % indexname)
        index = _catalog.getIndex(indexname)
        if isinstance(index, DateRangeIndex.DateRangeIndex):
            # Solr cannot deal with range indexes directly
            continue
        if isinstance(index, ZCTextIndex):
            get_word = index.getLexicon().get_word
            wid_decode = WidCode.decode
            batch = 0
            for i, (uid, value) in enumerate(index.index._docwords.items()):
                batch = _log_batch(db, batch, i)
                words = ' '.join([get_word(w) for w in wid_decode(value)])
                _dump(data_dir, uid, {indexname: words})
        elif isinstance(index, GopipIndex):
            # happens last as it needs a full site traversal
            gopip_indexes.add(indexname)
            continue
        elif not hasattr(index, '_unindex'):
            logger.warn("Unsupported index '%s' without an _unindex." %
                indexname)
        else:
            date_index = isinstance(index, DateIndex)
            batch = 0
            for i, (uid, value) in enumerate(index._unindex.iteritems()):
                batch = _log_batch(db, batch, i)
                value = _convert_value(indexname, value, date_index)
                if value:
                    _dump(data_dir, uid, {indexname: value})

    # dump metadata
    logger.info('Dumping metadata records')
    batch = 0
    for i, uid in enumerate(_catalog.paths.iterkeys()):
        batch = _log_batch(db, batch, i)
        values = {}
        for k, v in _catalog.getMetadataForRID(uid).iteritems():
            definition = schema.get(k)
            if not definition:
                continue
            class_ = definition['class_']
            date_index = class_ == 'solr.TrieDateField'
            value = _convert_value(k, v, date_index)
            if value is not None:
                values[k] = value
            elif class_ == 'solr.TextField':
                values[k] = ''
        _dump(data_dir, uid, values)

    # deal with GopipIndexes
    batch = 0
    logger.info('Traversing site to dump Gopip index information')
    for i, (path, obj) in enumerate(findObjects(site)):
        batch = _log_batch(db, batch, i)
        if not indexable(obj):
            continue
        elif getOwnIndexMethod(obj, 'indexObject') is not None:
            continue
        parent = aq_parent(obj)
        uid = uids_get('/'.join(obj.getPhysicalPath()), None)
        if uid is None:
            continue
        if hasattr(aq_base(parent), 'getObjectPosition'):
            pos = parent.getObjectPosition(path.split('/')[-1])
            data = {}
            for name in gopip_indexes:
                data[name] = pos
            _dump(data_dir, uid, data)
        else:
            data = {}
            for name in gopip_indexes:
                data[name] = 0
            _dump(data_dir, uid, data)
        if not getattr(aq_base(obj), 'isPrincipiaFolderish', False):
            # Remove non-folders from the cache immediately as we no longer
            # need them
            obj._p_deactivate()
Exemple #11
0
    def reindex(self, batch=1000, skip=0, limit=0, ignore_portal_types=None,
                only_portal_types=None):
        """ find all contentish objects (meaning all objects derived from one
            of the catalog mixin classes) and (re)indexes them """

        if ignore_portal_types and only_portal_types:
            raise ValueError("It is not possible to combine "
                             "ignore_portal_types with only_portal_types")

        manager = queryUtility(ISolrConnectionManager)
        proc = SolrIndexProcessor(manager)
        conn = manager.getConnection()
        zodb_conn = self.context._p_jar
        log = self.mklog()
        log('reindexing solr catalog...\n')
        if skip:
            log('skipping indexing of %d object(s)...\n' % skip)
        if limit:
            log('limiting indexing to %d object(s)...\n' % limit)
        real = timer()          # real time
        lap = timer()           # real lap time (for intermediate commits)
        cpu = timer(clock)      # cpu time
        processed = 0
        schema = manager.getSchema()
        key = schema.uniqueKey
        updates = {}            # list to hold data to be updated
        flush = lambda: conn.flush()
        flush = notimeout(flush)

        def checkPoint():
            for my_boost_values, data in updates.values():
                adder = data.pop('_solr_adder')
                adder(conn, boost_values=my_boost_values, **data)
            updates.clear()
            msg = 'intermediate commit (%d items processed, ' \
                  'last batch in %s)...\n' % (processed, lap.next())
            log(msg)
            logger.info(msg)
            flush()
            zodb_conn.cacheGC()
        cpi = checkpointIterator(checkPoint, batch)
        count = 0
        for path, obj in findObjects(self.context):
            if ICheckIndexable(obj)():
                count += 1
                if count <= skip:
                    continue

                if ignore_portal_types:
                    if obj.portal_type in ignore_portal_types:
                        continue

                if only_portal_types:
                    if obj.portal_type not in only_portal_types:
                        continue

                data, missing = proc.getData(obj)
                prepareData(data)
                if not missing:
                    value = data.get(key, None)
                    if value is not None:
                        log('indexing %r\n' % obj)
                        pt = data.get('portal_type', 'default')
                        adder = queryAdapter(obj, ISolrAddHandler, name=pt)
                        if adder is None:
                            adder = DefaultAdder(obj)
                        data['_solr_adder'] = adder
                        updates[value] = (boost_values(obj, data), data)
                        processed += 1
                        cpi.next()
                else:
                    log('missing data, skipping indexing of %r.\n' % obj)
                if limit and count >= (skip + limit):
                    break

        checkPoint()
        conn.commit()
        log('solr index rebuilt.\n')
        msg = 'processed %d items in %s (%s cpu time).'
        msg = msg % (processed, real.next(), cpu.next())
        log(msg)
        logger.info(msg)
Exemple #12
0
    def reindex(self, batch=1000, skip=0, idxs=[]):
        """ find all contentish objects (meaning all objects derived from one
            of the catalog mixin classes) and (re)indexes them """
        atomic = idxs != []
        manager = queryUtility(ISolrConnectionManager)
        proc = FtwSolrIndexProcessor(manager)
        conn = manager.getConnection()
        zodb_conn = self.context._p_jar
        log = self.mklog()
        log('reindexing solr catalog...\n')
        if skip:
            log('skipping indexing of %d object(s)...\n' % skip)
        real = timer()          # real time
        lap = timer()           # real lap time (for intermediate commits)
        cpu = timer(clock)      # cpu time
        processed = 0
        schema = manager.getSchema()
        key = schema.uniqueKey
        updates = {}            # list to hold data to be updated
        flush = lambda: conn.flush()
        flush = notimeout(flush)

        def checkPoint():
            for boost_values, data in updates.values():
                # Only update specified fields by using atomic updates
                conn.add(boost_values=boost_values, **data)
            updates.clear()
            msg = 'intermediate commit (%d items processed, ' \
                  'last batch in %s)...\n' % (processed, lap.next())
            log(msg)
            logger.info(msg)
            flush()
            zodb_conn.cacheGC()
        cpi = checkpointIterator(checkPoint, batch)
        count = 0
        for path, obj in findObjects(self.context):
            if indexable(obj):
                if getOwnIndexMethod(obj, 'indexObject') is not None:
                    log('skipping indexing of %r via private method.\n' % obj)
                    continue
                count += 1
                if count <= skip:
                    continue

                attributes = None
                if atomic:
                    attributes = idxs

                # For atomic updates to work the uniqueKey must be present
                # in *every* update operation.
                if attributes and not key in attributes:
                    attributes.append(key)

                data, missing = proc.getData(obj, attributes=attributes)
                prepareData(data)

                if not missing or atomic:
                    value = data.get(key, None)
                    if value is not None:
                        updates[value] = (boost_values(obj, data), data)
                        processed += 1
                        cpi.next()
                else:
                    log('missing data, skipping indexing of %r.\n' % obj)
        checkPoint()
        conn.commit()
        log('solr index rebuilt.\n')
        msg = 'processed %d items in %s (%s cpu time).'
        msg = msg % (processed, real.next(), cpu.next())
        log(msg)
        logger.info(msg)