def reindex_all_contents(session, delete=True): if delete: settings = get_index_settings(config) index_name = settings['index_name'] delete_index(index_name) create_index_and_mapping(index_name) batch_reindex_elasticsearch(session)
def create_index(index_name): """Create the index and return connection. """ es = connect() settings = get_index_settings(config)['index_settings'] exists = es.indices.exists(index_name) if not exists: es.indices.create(index=index_name, body={'settings': settings}) return es
def check_analysis_settings(index_name): es = connect() settings = get_index_settings(config)['index_settings'] try: current = es.indices.get_settings(index_name) current = current[index_name]['settings']['index'] return compare_dicts_ref( stringify_dict(settings), current, # this setting does not stick for some reason lambda k: k != 'split_on_numerics') except (TransportError, KeyError): return False
def create_index(index_name): """Create the index and return connection. """ es = connect() exists = es.indices.exists(index_name) if exists: valid_analysis_settings = check_analysis_settings(index_name) if not valid_analysis_settings: try: push_analysis_settings(index_name) assert check_analysis_settings(index_name) except Exception as e: print(e) # cannot push settings on amazon delete_index(index_name) exists = False if not exists: settings = get_index_settings(config)['index_settings'] es.indices.create(index_name, {'settings': settings}) return es
def search_endpoint(context, request): if not indexing_active(): return HTTPServiceUnavailable("Indexing inactive") query = request.json_body # u'query': {u'bool': {u'filter': [{u'term': {u'discussion_id': u'23'}}]}} filters = [fil for fil in query['query']['bool']['filter']] discussion_id = [ f.values()[0].values()[0] for f in filters if 'discussion_id' in f.values()[0].keys() ][0] discussion = models.Discussion.get_instance(discussion_id) if discussion is None: raise HTTPUnauthorized() user_id = authenticated_userid(request) or Everyone permissions = get_permissions(user_id, discussion_id) if not discussion.user_can(user_id, CrudPermissions.READ, permissions): raise HTTPUnauthorized() es = connect() index_name = get_index_settings()['index_name'] # print get_curl_query(query) result = es.search(index=index_name, body=query) # add creator_name in each hit creator_ids = set([ hit['_source']['creator_id'] for hit in result['hits']['hits'] if hit['_source'].get('creator_id', None) is not None ]) session = get_session_maker() creators = session.query( models.AgentProfile.id, models.AgentProfile.name).filter( models.AgentProfile.id.in_(creator_ids)).all() creators_by_id = dict(creators) for hit in result['hits']['hits']: source = hit['_source'] creator_id = source.get('creator_id', None) # Remove inner_hits key to not leak posts from private discussion. # You can easily craft a query to get the participants of a public # discussion and do a has_child filter with inner_hits on a private discussion. if 'inner_hits' in hit: del hit['inner_hits'] if creator_id is not None: source['creator_name'] = creators_by_id.get(creator_id) if hit['_type'] == 'idea': idea = models.Idea.get_instance(source['id']) # The check is not really necessary because it's the same # 'read' permission as the discussion, but it doesn't cost anything # to check it and the READ permission may change in the future. if not idea.user_can(user_id, CrudPermissions.READ, permissions): raise HTTPUnauthorized source['num_posts'] = idea.num_posts source['num_contributors'] = idea.num_contributors elif hit['_type'] == 'user': agent_profile = models.AgentProfile.get_instance(source['id']) if not agent_profile.user_can(user_id, CrudPermissions.READ, permissions): raise HTTPUnauthorized source['num_posts'] = agent_profile.count_posts_in_discussion( discussion_id) # Don't do an extra request to verify the CrudPermissions.READ permission # for post or synthesis. # It's currently the same 'read' permission as the discussion. # elif hit['_type'] in ('synthesis', 'post'): # post = models.Post.get_instance(source['id']) # if not post.user_can(user_id, CrudPermissions.READ, permissions): # raise HTTPUnauthorized return result
def search_endpoint(context, request): if not indexing_active(): return HTTPServiceUnavailable("Indexing inactive") query = request.json_body # u'query': {u'bool': {u'filter': [{u'term': {u'discussion_id': u'23'}}]}} filters = [fil for fil in query['query']['bool']['filter']] discussion_id = [f.values()[0].values()[0] for f in filters if 'discussion_id' in f.values()[0].keys()][0] discussion = models.Discussion.get_instance(discussion_id) if discussion is None: raise HTTPUnauthorized() user_id = request.authenticated_userid or Everyone permissions = get_permissions(user_id, discussion_id) if not discussion.user_can(user_id, CrudPermissions.READ, permissions): raise HTTPUnauthorized() es = connect() index_name = get_index_settings(config)['index_name'] # print get_curl_query(query) result = es.search(index=index_name, body=query) # add creator_name in each hit creator_ids = set([hit['_source']['creator_id'] for hit in result['hits']['hits'] if hit['_source'].get('creator_id', None) is not None]) session = get_session_maker() creators = session.query(models.AgentProfile.id, models.AgentProfile.name ).filter(models.AgentProfile.id.in_(creator_ids)).all() creators_by_id = dict(creators) for hit in result['hits']['hits']: source = hit['_source'] creator_id = source.get('creator_id', None) # Remove inner_hits key to not leak posts from private discussion. # You can easily craft a query to get the participants of a public # discussion and do a has_child filter with inner_hits on a private discussion. if 'inner_hits' in hit: del hit['inner_hits'] if creator_id is not None: source['creator_name'] = creators_by_id.get(creator_id) if hit['_type'] == 'idea': idea = models.Idea.get_instance(source['id']) # The check is not really necessary because it's the same # 'read' permission as the discussion, but it doesn't cost anything # to check it and the READ permission may change in the future. if not idea.user_can(user_id, CrudPermissions.READ, permissions): raise HTTPUnauthorized source['num_posts'] = idea.num_posts source['num_contributors'] = idea.num_contributors elif hit['_type'] == 'user': agent_profile = models.AgentProfile.get_instance(source['id']) if not agent_profile.user_can(user_id, CrudPermissions.READ, permissions): raise HTTPUnauthorized source['num_posts'] = agent_profile.count_posts_in_discussion(discussion_id) # Don't do an extra request to verify the CrudPermissions.READ permission # for post or synthesis. # It's currently the same 'read' permission as the discussion. # elif hit['_type'] in ('synthesis', 'post'): # post = models.Post.get_instance(source['id']) # if not post.user_can(user_id, CrudPermissions.READ, permissions): # raise HTTPUnauthorized return result
def push_analysis_settings(index_name): es = connect() settings = get_index_settings(config)['index_settings'] es.indices.close(index_name) es.indices.put_settings({'analysis': settings['analysis']}, index_name) es.indices.open(index_name)