예제 #1
0
def update_index(*args):
    """
    Index 1000 posts every 3 minutes
    """
    from biostar.forum.models import Post
    from biostar.forum import search
    from django.conf import settings

    # Get un-indexed posts
    posts = Post.objects.filter(indexed=False)[:settings.BATCH_INDEXING_SIZE]

    # Nothing to be done.
    if not posts:
        message("No new posts found")
        return

    message(f"Indexing {len(posts)} posts.")

    # Update indexed field on posts.
    Post.objects.filter(id__in=posts.values('id')).update(indexed=True)

    try:
        search.index_posts(posts=posts)
        message(f"Updated search index with {len(posts)} posts.")
    except Exception as exc:
        message(f'Error updating index: {exc}')
        Post.objects.filter(id__in=posts.values('id')).update(indexed=False)

    return
예제 #2
0
def build_spam_index(overwrite=False, add_ham=False, limit=500):
    # Get all un-indexed spam posts.

    spam = Post.objects.filter(spam=Post.SPAM)
    spam = spam.order_by("pk")[:limit]
    spam = list(spam.values_list("id", flat=True))
    # Set indexed flag here so it does not get added to main index.
    if add_ham:
        ham = Post.objects.valid_posts()
        ham = list(ham.values_list("id", flat=True))
        ham = random.sample(ham, k=sizer(ham, size=limit))
    else:
        ham = []

    posts = Post.objects.filter(id__in=chain(spam, ham))

    # Initialize the spam index
    ix = bootstrap_index()

    # Batch index spam posts.
    search.index_posts(posts=posts,
                       ix=ix,
                       overwrite=overwrite,
                       add_func=add_post_to_index)

    logger.info("Built spam index.")
    return ix
예제 #3
0
    def handle(self, *args, **options):

        # Index all un-indexed posts that have a root.
        logger.info(f"Database: {settings.DATABASE_NAME}")
        reset = options['reset']
        remove = options['remove']
        report = options['report']
        index = options['index']

        # Sets the un-indexed flags to false on all posts.
        if reset:
            logger.info(f"Setting indexed field to false on all post.")
            Post.objects.valid_posts(indexed=True).exclude(root=None).update(
                indexed=False)

        # Index a limited number yet unindexed posts
        if index:

            # How many total posts can be indexed
            start_count = Post.objects.valid_posts(indexed=False).exclude(
                root=None).count()
            logger.info(f"Starting with {start_count} unindexed posts")

            posts = Post.objects.valid_posts(indexed=False).exclude(
                root=None)[:index]
            target_count = len(posts)

            logger.info(f"Indexing {target_count} posts")

            # The list of posts to update
            ids = [post.id for post in posts]

            # Add post to search index.
            search.index_posts(posts=posts, overwrite=remove)

            # Set the indexed field to true.
            Post.objects.filter(id__in=ids).update(indexed=True)

            count = Post.objects.valid_posts(indexed=False).exclude(
                root=None).count()
            logger.info(f"Finished with {count} unindexed posts remaining")

        # Report the contents of the index
        if report:
            search.print_info()
def build(size, remove=False):
    """
    Builds search index
    """

    # Get top level posts that have not been indexed.
    posts = Post.objects.valid_posts(
        indexed=False, is_toplevel=True).exclude(root=None)[:size]
    target_count = len(posts)

    # The list of posts to update
    ids = [post.id for post in posts]

    # Add post to search index.
    search.index_posts(posts=posts, overwrite=remove)

    # Set the indexed field to true.
    Post.objects.filter(id__in=ids).update(indexed=True)

    count = Post.objects.valid_posts(
        indexed=False, is_toplevel=True).exclude(root=None).count()

    logger.info(
        f"Indexed {target_count} posts, {count} unindexed posts remaining")

    # Take spam posts that have been indexed and remove.
    spam_posts = Post.objects.filter(spam=Post.SPAM, indexed=False)[:size]
    sids = [post.id for post in spam_posts]

    for post in spam_posts:
        # Remove spam from search index.
        search.remove_post(post=post)

    # Update the spam indexed flag.
    Post.objects.filter(id__in=sids).update(indexed=True)

    # Add to spam index
    logger.info(f"Removed {len(sids)} spam posts from index")