def _run_changed(msgs, chan): start = datetime.now(g.tz) changed = map(lambda x: strordict_fullname(x.body), msgs) boost = set() add = set() # an item can request that only its boost fields be updated, # so we need to separate those out for item in changed: fname = item['fullname'] boost_only = item.get('boost_only', False) if fname in add: # we're already going to do all of the work continue if boost_only: boost.add(fname) else: if fname in boost: # we've previously seen an instance of this fname # that requested that only its boosts be updated, # but now we have to update the whole thing boost.remove(fname) add.add(fname) things = Thing._by_fullname(boost | add, data=True, return_dict=True) boost_time = add_time = 0.0 if boost: boost_time = inject([things[fname] for fname in boost], boost_only=True) if add: add_time = inject([things[fname] for fname in add]) totaltime = epoch_seconds(datetime.now(g.tz)) - epoch_seconds(start) print( "%s: %d messages: %d docs (%.2fs), %d boosts (%.2fs) in %.2fs (%d duplicates, %s remaining)" % ( start, len(changed), len(add), add_time, len(boost), boost_time, totaltime, len(changed) - len(things), msgs[-1].delivery_info.get('message_count', 'unknown'), ))
def _run_changed(msgs, chan): start = datetime.now(g.tz) changed = map(lambda x: strordict_fullname(x.body), msgs) boost = set() add = set() # an item can request that only its boost fields be updated, # so we need to separate those out for item in changed: fname = item["fullname"] boost_only = item.get("boost_only", False) if fname in add: # we're already going to do all of the work continue if boost_only: boost.add(fname) else: if fname in boost: # we've previously seen an instance of this fname # that requested that only its boosts be updated, # but now we have to update the whole thing boost.remove(fname) add.add(fname) things = Thing._by_fullname(boost | add, data=True, return_dict=True) boost_time = add_time = 0.0 if boost: boost_time = inject([things[fname] for fname in boost], boost_only=True) if add: add_time = inject([things[fname] for fname in add]) totaltime = epoch_seconds(datetime.now(g.tz)) - epoch_seconds(start) print( "%s: %d messages: %d docs (%.2fs), %d boosts (%.2fs) in %.2fs (%d duplicates, %s remaining)" % ( start, len(changed), len(add), add_time, len(boost), boost_time, totaltime, len(changed) - len(things), msgs[-1].delivery_info.get("message_count", "unknown"), ) )
def _run_changed(msgs, chan): print "changed: Processing %d items" % len(msgs) msgs = [strordict_fullname(msg.body) for msg in msgs] fullnames = set(msg['fullname'] for msg in msgs) things = Thing._by_fullname(fullnames, data=True, return_dict=False) things = [x for x in things if isinstance(x, indexed_types)] update_things = [x for x in things if not x._spam and not x._deleted] delete_things = [x for x in things if x._spam or x._deleted] with SolrConnection() as s: if update_things: tokenized = tokenize_things(update_things) s.add(tokenized) if delete_things: for i in delete_things: s.delete(id=i._fullname)
def _run_changed(msgs, chan): print "changed: Processing %d items" % len(msgs) msgs = [strordict_fullname(msg.body) for msg in msgs] fullnames = set(msg['fullname'] for msg in msgs if not msg.get('boost_only')) things = Thing._by_fullname(fullnames, data=True, return_dict=False) things = [x for x in things if isinstance(x, indexed_types)] update_things = [x for x in things if not x._spam and not x._deleted] delete_things = [x for x in things if x._spam or x._deleted] with SolrConnection() as s: if update_things: tokenized = tokenize_things(update_things) s.add(tokenized) if delete_things: for i in delete_things: s.delete(id=i._fullname)
def _run_changed(msgs, chan): changed = map(lambda x: strordict_fullname(x.body), msgs) boost = set() add = set() # an item can request that only its boost fields be updated, # so we need to separate those out for item in changed: fname = item['fullname'] boost_only = item.get('boost_only', False) if fname in add: # we're already going to do all of the work continue if boost_only: boost.add(fname) else: if fname in boost: # we've previously seen an instance of this fname # that requested that only its boosts be updated, # but now we have to update the whole thing boost.remove(fname) add.add(fname) things = Thing._by_fullname(boost | add, data=True, return_dict=True) print ("%d messages: %d docs, %d boosts (%d duplicates, %s remaining)" % (len(changed), len(add), len(boost), len(changed) - len(things), msgs[-1].delivery_info.get('message_count', 'unknown'), )) if boost: inject([things[fname] for fname in boost], boost_only=True) if add: inject([things[fname] for fname in add])
def _run_changed(msgs, chan): print "changed: Processing %d items" % len(msgs) msgs = [strordict_fullname(msg.body) for msg in msgs] fullnames = set(msg['fullname'] for msg in msgs) things = Thing._by_fullname(fullnames, data=True, return_dict=False) things = [x for x in things if isinstance(x, indexed_types)] update_things = [x for x in things if not x._spam and not x._deleted] delete_things = [x for x in things if x._spam or x._deleted] #How often should we commit? commit=random()<.1 if commit: print "COMMITING!" with SolrConnection() as s: if update_things: tokenized = tokenize_things(update_things) s.add(tokenized,commit=commit) if delete_things: for i in delete_things: s.delete(id=i._fullname)