def update_index(*args): """ Index 1000 posts every 3 minutes """ from biostar.forum.models import Post from biostar.forum import search from django.conf import settings # Get un-indexed posts posts = Post.objects.filter(indexed=False)[:settings.BATCH_INDEXING_SIZE] # Nothing to be done. if not posts: message("No new posts found") return message(f"Indexing {len(posts)} posts.") # Update indexed field on posts. Post.objects.filter(id__in=posts.values('id')).update(indexed=True) try: search.index_posts(posts=posts) message(f"Updated search index with {len(posts)} posts.") except Exception as exc: message(f'Error updating index: {exc}') Post.objects.filter(id__in=posts.values('id')).update(indexed=False) return
def build_spam_index(overwrite=False, add_ham=False, limit=500): # Get all un-indexed spam posts. spam = Post.objects.filter(spam=Post.SPAM) spam = spam.order_by("pk")[:limit] spam = list(spam.values_list("id", flat=True)) # Set indexed flag here so it does not get added to main index. if add_ham: ham = Post.objects.valid_posts() ham = list(ham.values_list("id", flat=True)) ham = random.sample(ham, k=sizer(ham, size=limit)) else: ham = [] posts = Post.objects.filter(id__in=chain(spam, ham)) # Initialize the spam index ix = bootstrap_index() # Batch index spam posts. search.index_posts(posts=posts, ix=ix, overwrite=overwrite, add_func=add_post_to_index) logger.info("Built spam index.") return ix
def handle(self, *args, **options): # Index all un-indexed posts that have a root. logger.info(f"Database: {settings.DATABASE_NAME}") reset = options['reset'] remove = options['remove'] report = options['report'] index = options['index'] # Sets the un-indexed flags to false on all posts. if reset: logger.info(f"Setting indexed field to false on all post.") Post.objects.valid_posts(indexed=True).exclude(root=None).update( indexed=False) # Index a limited number yet unindexed posts if index: # How many total posts can be indexed start_count = Post.objects.valid_posts(indexed=False).exclude( root=None).count() logger.info(f"Starting with {start_count} unindexed posts") posts = Post.objects.valid_posts(indexed=False).exclude( root=None)[:index] target_count = len(posts) logger.info(f"Indexing {target_count} posts") # The list of posts to update ids = [post.id for post in posts] # Add post to search index. search.index_posts(posts=posts, overwrite=remove) # Set the indexed field to true. Post.objects.filter(id__in=ids).update(indexed=True) count = Post.objects.valid_posts(indexed=False).exclude( root=None).count() logger.info(f"Finished with {count} unindexed posts remaining") # Report the contents of the index if report: search.print_info()
def build(size, remove=False): """ Builds search index """ # Get top level posts that have not been indexed. posts = Post.objects.valid_posts( indexed=False, is_toplevel=True).exclude(root=None)[:size] target_count = len(posts) # The list of posts to update ids = [post.id for post in posts] # Add post to search index. search.index_posts(posts=posts, overwrite=remove) # Set the indexed field to true. Post.objects.filter(id__in=ids).update(indexed=True) count = Post.objects.valid_posts( indexed=False, is_toplevel=True).exclude(root=None).count() logger.info( f"Indexed {target_count} posts, {count} unindexed posts remaining") # Take spam posts that have been indexed and remove. spam_posts = Post.objects.filter(spam=Post.SPAM, indexed=False)[:size] sids = [post.id for post in spam_posts] for post in spam_posts: # Remove spam from search index. search.remove_post(post=post) # Update the spam indexed flag. Post.objects.filter(id__in=sids).update(indexed=True) # Add to spam index logger.info(f"Removed {len(sids)} spam posts from index")