def bootstrap_indexing(db): from assembl.indexing import indexing_active from assembl.indexing.utils import check_mapping, maybe_create_and_reindex if not indexing_active(): return index_name = config.get('elasticsearch_index') if not check_mapping(index_name): with locked_transaction(db, 1237) as session: maybe_create_and_reindex(index_name, session)
def reindex_content(content, action='update'): """Index, reindex or unindex content. This function is called by the after_insert/update/delete sqlalchemy events. """ from assembl.models.post import PublicationStates from assembl.models import (AgentStatusInDiscussion, Post, AgentProfile, Idea, IdeaContentLink, IdeaAnnouncement, SentimentOfPost, Extract, Username) if not indexing_active(): return indexed_contents = (Post, AgentProfile, Idea, Extract) changes = get_changes() if action == 'delete' and isinstance(content, indexed_contents): changes.unindex_content(content) elif isinstance(content, AgentProfile): changes.index_content(content) elif isinstance(content, Username): changes.index_content(content.user) for post in content.user.posts_created: reindex_content(post) for extract in content.user.extracts_created: reindex_content(extract) elif isinstance(content, AgentStatusInDiscussion): reindex_content(content.agent_profile) elif type(content) == Idea: # only index Idea, not Thematic or Question if (not content.hidden and content.tombstone_date is None): changes.index_content(content) else: changes.unindex_content(content) elif isinstance(content, Post): if (content.publication_state == PublicationStates.PUBLISHED and not content.hidden and content.tombstone_date is None and not content.is_bright_mirror_fiction()): changes.index_content(content) for extract in content.extracts: changes.index_content(extract) else: changes.unindex_content(content) for extract in content.extracts: changes.unindex_content(extract) elif isinstance(content, Extract): # warning: should always be above isinstance(content, IdeaContentLink) block changes.index_content(content) elif isinstance(content, IdeaContentLink): # A AssemblPost is indexed before any IdeaRelatedPostLink is created, # so be sure to reindex content.content if we have a IdeaContentLink reindex_content(content.content) elif isinstance(content, IdeaAnnouncement): reindex_content(content.idea) elif isinstance(content, SentimentOfPost): reindex_content(content.post_from_sentiments)
def reindex_content(content, action='update'): """Index, reindex or unindex content. This function is called by the after_insert/update/delete sqlalchemy events. """ from assembl.models.post import PublicationStates from assembl.models import ( AgentStatusInDiscussion, Post, AgentProfile, Idea, IdeaContentLink, IdeaAnnouncement, SentimentOfPost, Extract) if not indexing_active(): return indexed_contents = (Post, AgentProfile, Idea, Extract) changes = get_changes() if action == 'delete' and isinstance(content, indexed_contents): changes.unindex_content(content) elif isinstance(content, AgentProfile): changes.index_content(content) elif isinstance(content, AgentStatusInDiscussion): reindex_content(content.agent_profile) elif type(content) == Idea: # only index Idea, not Thematic or Question if (not content.hidden and content.tombstone_date is None): changes.index_content(content) else: changes.unindex_content(content) elif isinstance(content, Post): if (content.publication_state == PublicationStates.PUBLISHED and not content.hidden and content.tombstone_date is None and not content.is_bright_mirror_fiction()): changes.index_content(content) for extract in content.extracts: changes.index_content(extract) else: changes.unindex_content(content) for extract in content.extracts: changes.unindex_content(extract) elif isinstance(content, Extract): # warning: should always be above isinstance(content, IdeaContentLink) block changes.index_content(content) elif isinstance(content, IdeaContentLink): # A AssemblPost is indexed before any IdeaRelatedPostLink is created, # so be sure to reindex content.content if we have a IdeaContentLink reindex_content(content.content) elif isinstance(content, IdeaAnnouncement): reindex_content(content.idea) elif isinstance(content, SentimentOfPost): reindex_content(content.post_from_sentiments)
def reindex_content(content, action='update'): """Index, reindex or unindex content. This function is called by the after_insert/update/delete sqlalchemy events. """ from assembl.models.post import PublicationStates from assembl.models import (AgentStatusInDiscussion, Post, AgentProfile, Idea, IdeaContentLink, IdeaAnnouncement, SentimentOfPost) if not indexing_active(): return indexed_contents = (Post, AgentProfile, Idea) changes = get_changes() if action == 'delete' and isinstance(content, indexed_contents): changes.unindex_content(content) elif isinstance(content, AgentProfile): changes.index_content(content) elif isinstance(content, AgentStatusInDiscussion): reindex_content(content.agent_profile) elif isinstance(content, Idea): if (not content.hidden and content.tombstone_date is None): changes.index_content(content) else: changes.unindex_content(content) elif isinstance(content, Post): # don't index proposition posts if content.type == 'proposition_post': return if (content.publication_state == PublicationStates.PUBLISHED and not content.hidden and content.tombstone_date is None): changes.index_content(content) else: changes.unindex_content(content) elif isinstance(content, IdeaContentLink): # A AssemblPost is indexed before any IdeaRelatedPostLink is created, # so be sure to reindex content.content if we have a IdeaContentLink reindex_content(content.content) elif isinstance(content, IdeaAnnouncement): reindex_content(content.idea) elif isinstance(content, SentimentOfPost): reindex_content(content.post_from_sentiments)
def join_transaction(event): if indexing_active(): changes._join()
def search_endpoint(context, request): if not indexing_active(): return HTTPServiceUnavailable("Indexing inactive") query = request.json_body # u'query': {u'bool': {u'filter': [{u'term': {u'discussion_id': u'23'}}]}} filters = [fil for fil in query['query']['bool']['filter']] discussion_id = [ f.values()[0].values()[0] for f in filters if 'discussion_id' in f.values()[0].keys() ][0] discussion = models.Discussion.get_instance(discussion_id) if discussion is None: raise HTTPUnauthorized() user_id = authenticated_userid(request) or Everyone permissions = get_permissions(user_id, discussion_id) if not discussion.user_can(user_id, CrudPermissions.READ, permissions): raise HTTPUnauthorized() es = connect() index_name = get_index_settings()['index_name'] # print get_curl_query(query) result = es.search(index=index_name, body=query) # add creator_name in each hit creator_ids = set([ hit['_source']['creator_id'] for hit in result['hits']['hits'] if hit['_source'].get('creator_id', None) is not None ]) session = get_session_maker() creators = session.query( models.AgentProfile.id, models.AgentProfile.name).filter( models.AgentProfile.id.in_(creator_ids)).all() creators_by_id = dict(creators) for hit in result['hits']['hits']: source = hit['_source'] creator_id = source.get('creator_id', None) # Remove inner_hits key to not leak posts from private discussion. # You can easily craft a query to get the participants of a public # discussion and do a has_child filter with inner_hits on a private discussion. if 'inner_hits' in hit: del hit['inner_hits'] if creator_id is not None: source['creator_name'] = creators_by_id.get(creator_id) if hit['_type'] == 'idea': idea = models.Idea.get_instance(source['id']) # The check is not really necessary because it's the same # 'read' permission as the discussion, but it doesn't cost anything # to check it and the READ permission may change in the future. if not idea.user_can(user_id, CrudPermissions.READ, permissions): raise HTTPUnauthorized source['num_posts'] = idea.num_posts source['num_contributors'] = idea.num_contributors elif hit['_type'] == 'user': agent_profile = models.AgentProfile.get_instance(source['id']) if not agent_profile.user_can(user_id, CrudPermissions.READ, permissions): raise HTTPUnauthorized source['num_posts'] = agent_profile.count_posts_in_discussion( discussion_id) # Don't do an extra request to verify the CrudPermissions.READ permission # for post or synthesis. # It's currently the same 'read' permission as the discussion. # elif hit['_type'] in ('synthesis', 'post'): # post = models.Post.get_instance(source['id']) # if not post.user_can(user_id, CrudPermissions.READ, permissions): # raise HTTPUnauthorized return result
def search_endpoint(context, request): if not indexing_active(): return HTTPServiceUnavailable("Indexing inactive") query = request.json_body # u'query': {u'bool': {u'filter': [{u'term': {u'discussion_id': u'23'}}]}} filters = [fil for fil in query['query']['bool']['filter']] discussion_id = [f.values()[0].values()[0] for f in filters if 'discussion_id' in f.values()[0].keys()][0] discussion = models.Discussion.get_instance(discussion_id) if discussion is None: raise HTTPUnauthorized() user_id = request.authenticated_userid or Everyone permissions = get_permissions(user_id, discussion_id) if not discussion.user_can(user_id, CrudPermissions.READ, permissions): raise HTTPUnauthorized() es = connect() index_name = get_index_settings(config)['index_name'] # print get_curl_query(query) result = es.search(index=index_name, body=query) # add creator_name in each hit creator_ids = set([hit['_source']['creator_id'] for hit in result['hits']['hits'] if hit['_source'].get('creator_id', None) is not None]) session = get_session_maker() creators = session.query(models.AgentProfile.id, models.AgentProfile.name ).filter(models.AgentProfile.id.in_(creator_ids)).all() creators_by_id = dict(creators) for hit in result['hits']['hits']: source = hit['_source'] creator_id = source.get('creator_id', None) # Remove inner_hits key to not leak posts from private discussion. # You can easily craft a query to get the participants of a public # discussion and do a has_child filter with inner_hits on a private discussion. if 'inner_hits' in hit: del hit['inner_hits'] if creator_id is not None: source['creator_name'] = creators_by_id.get(creator_id) if hit['_type'] == 'idea': idea = models.Idea.get_instance(source['id']) # The check is not really necessary because it's the same # 'read' permission as the discussion, but it doesn't cost anything # to check it and the READ permission may change in the future. if not idea.user_can(user_id, CrudPermissions.READ, permissions): raise HTTPUnauthorized source['num_posts'] = idea.num_posts source['num_contributors'] = idea.num_contributors elif hit['_type'] == 'user': agent_profile = models.AgentProfile.get_instance(source['id']) if not agent_profile.user_can(user_id, CrudPermissions.READ, permissions): raise HTTPUnauthorized source['num_posts'] = agent_profile.count_posts_in_discussion(discussion_id) # Don't do an extra request to verify the CrudPermissions.READ permission # for post or synthesis. # It's currently the same 'read' permission as the discussion. # elif hit['_type'] in ('synthesis', 'post'): # post = models.Post.get_instance(source['id']) # if not post.user_can(user_id, CrudPermissions.READ, permissions): # raise HTTPUnauthorized return result
def join_transaction(event): if indexing_active(): get_changes()._join()