def es_index(mock_elastic_index): # Create the index. index = Index() index_name = index.timestamped_index() index.create_index(index_name) index.update_aliases(index_name) # Update mapping proj = ProjectIndex() proj.put_mapping() page = PageIndex() page.put_mapping() sec = SectionIndex() sec.put_mapping() yield index index.delete_index(index_name=index_name)
def handle(self, *args, **options): """Provision new ES instance""" index = Index() index_name = index.timestamped_index() log.info("Creating indexes..") index.create_index(index_name) index.update_aliases(index_name) log.info("Updating mappings..") proj = ProjectIndex() proj.put_mapping() page = PageIndex() page.put_mapping() sec = SectionIndex() sec.put_mapping() log.info("Done!")
def search(request): project_slug = request.GET.get('project', None) version_slug = request.GET.get('version', LATEST) query = request.GET.get('q', None) if project_slug is None or query is None: return Response({'error': 'Need project and q'}, status=status.HTTP_400_BAD_REQUEST) log.debug("(API Search) %s" % query) kwargs = {} body = { "query": { "function_score": { "field_value_factor": {"field": "weight"}, "query": { "bool": { "should": [ {"match": { "title": {"query": query, "boost": 10}}}, {"match": { "headers": {"query": query, "boost": 5}}}, {"match": {"content": {"query": query}}}, ] } } } }, "highlight": { "fields": { "title": {}, "headers": {}, "content": {}, } }, "fields": ["title", "project", "version", "path"], "size": 50 # TODO: Support pagination. } if project_slug: body['filter'] = { "and": [ {"term": {"project": project_slug}}, {"term": {"version": version_slug}}, ] } # Add routing to optimize search by hitting the right shard. kwargs['routing'] = project_slug results = PageIndex().search(body, **kwargs) return Response({'results': results})
def elastic_project_search(request, project_slug): """Use elastic search to search in a project.""" queryset = Project.objects.protected(request.user) project = get_object_or_404(queryset, slug=project_slug) version_slug = request.GET.get('version', LATEST) query = request.GET.get('q', None) if query: user = '' if request.user.is_authenticated(): user = request.user log.info( LOG_TEMPLATE.format( user=user, project=project or '', type='inproject', version=version_slug or '', language='', msg=query or '', )) if query: kwargs = {} body = { 'query': { 'bool': { 'should': [ { 'match': { 'title': { 'query': query, 'boost': 10 } } }, { 'match': { 'headers': { 'query': query, 'boost': 5 } } }, { 'match': { 'content': { 'query': query } } }, ] } }, 'highlight': { 'fields': { 'title': {}, 'headers': {}, 'content': {}, } }, 'fields': ['title', 'project', 'version', 'path'], 'filter': { 'and': [ { 'term': { 'project': project_slug } }, { 'term': { 'version': version_slug } }, ] }, 'size': 50, # TODO: Support pagination. } # Add routing to optimize search by hitting the right shard. kwargs['routing'] = project_slug results = PageIndex().search(body, **kwargs) else: results = {} if results: # pre and post 1.0 compat for num, hit in enumerate(results['hits']['hits']): for key, val in list(hit['fields'].items()): if isinstance(val, list): results['hits']['hits'][num]['fields'][key] = val[0] return render( request, 'search/elastic_project_search.html', { 'project': project, 'query': query, 'results': results, }, )
def index_search_request(version, page_list, commit, project_scale, page_scale, section=True, delete=True): """ Update search indexes with build output JSON. In order to keep sub-projects all indexed on the same shard, indexes will be updated using the parent project's slug as the routing value. """ # TODO refactor this function # pylint: disable=too-many-locals project = version.project log_msg = ' '.join([page['path'] for page in page_list]) log.info("Updating search index: project=%s pages=[%s]", project.slug, log_msg) project_obj = ProjectIndex() project_obj.index_document( data={ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), 'tags': None, 'weight': project_scale, }) page_obj = PageIndex() section_obj = SectionIndex() index_list = [] section_index_list = [] routes = [project.slug] routes.extend([p.parent.slug for p in project.superprojects.all()]) for page in page_list: log.debug("Indexing page: %s:%s", project.slug, page['path']) to_hash = '-'.join([project.slug, version.slug, page['path']]) page_id = hashlib.md5(to_hash.encode('utf-8')).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], 'taxonomy': None, 'commit': commit, 'weight': page_scale + project_scale, }) if section: for sect in page['sections']: id_to_hash = '-'.join( [project.slug, version.slug, page['path'], sect['id']]) section_index_list.append({ 'id': (hashlib.md5(id_to_hash.encode('utf-8')).hexdigest()), 'project': project.slug, 'version': version.slug, 'path': page['path'], 'page_id': sect['id'], 'title': sect['title'], 'content': sect['content'], 'weight': page_scale, }) for route in routes: section_obj.bulk_index(section_index_list, parent=page_id, routing=route) for route in routes: page_obj.bulk_index(index_list, parent=project.slug, routing=route) if delete: log.info("Deleting files not in commit: %s", commit) # TODO: AK Make sure this works delete_query = { "query": { "bool": { "must": [ { "term": { "project": project.slug, } }, { "term": { "version": version.slug, } }, ], "must_not": { "term": { "commit": commit } } } } } page_obj.delete_document(body=delete_query)
def elastic_project_search(request, project_slug): """Use elastic search to search in a project""" queryset = Project.objects.protected(request.user) project = get_object_or_404(queryset, slug=project_slug) version_slug = request.GET.get('version', LATEST) query = request.GET.get('q', None) if query: user = '' if request.user.is_authenticated(): user = request.user log.info(LOG_TEMPLATE.format( user=user, project=project or '', type='inproject', version=version_slug or '', language='', msg=query or '', )) if query: kwargs = {} body = { "query": { "bool": { "should": [ {"match": {"title": {"query": query, "boost": 10}}}, {"match": {"headers": {"query": query, "boost": 5}}}, {"match": {"content": {"query": query}}}, ] } }, "highlight": { "fields": { "title": {}, "headers": {}, "content": {}, } }, "fields": ["title", "project", "version", "path"], "filter": { "and": [ {"term": {"project": project_slug}}, {"term": {"version": version_slug}}, ] }, "size": 50 # TODO: Support pagination. } # Add routing to optimize search by hitting the right shard. kwargs['routing'] = project_slug results = PageIndex().search(body, **kwargs) else: results = {} if results: # pre and post 1.0 compat for num, hit in enumerate(results['hits']['hits']): for key, val in hit['fields'].items(): if isinstance(val, list): results['hits']['hits'][num]['fields'][key] = val[0] return render_to_response( 'search/elastic_project_search.html', { 'project': project, 'query': query, 'results': results, }, context_instance=RequestContext(request), )
def index_search_request(version, page_list, commit, project_scale, page_scale, section=True, delete=True): """Update search indexes with build output JSON In order to keep sub-projects all indexed on the same shard, indexes will be updated using the parent project's slug as the routing value. """ # TODO refactor this function # pylint: disable=too-many-locals project = version.project log_msg = ' '.join([page['path'] for page in page_list]) log.info("Updating search index: project=%s pages=[%s]", project.slug, log_msg) project_obj = ProjectIndex() project_obj.index_document(data={ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), 'tags': None, 'weight': project_scale, }) page_obj = PageIndex() section_obj = SectionIndex() index_list = [] section_index_list = [] routes = [project.slug] routes.extend([p.parent.slug for p in project.superprojects.all()]) for page in page_list: log.debug("Indexing page: %s:%s", project.slug, page['path']) to_hash = '-'.join([project.slug, version.slug, page['path']]) page_id = hashlib.md5(to_hash.encode('utf-8')).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], 'taxonomy': None, 'commit': commit, 'weight': page_scale + project_scale, }) if section: for sect in page['sections']: id_to_hash = '-'.join([project.slug, version.slug, page['path'], sect['id']]) section_index_list.append({ 'id': (hashlib.md5(id_to_hash.encode('utf-8')).hexdigest()), 'project': project.slug, 'version': version.slug, 'path': page['path'], 'page_id': sect['id'], 'title': sect['title'], 'content': sect['content'], 'weight': page_scale, }) for route in routes: section_obj.bulk_index(section_index_list, parent=page_id, routing=route) for route in routes: page_obj.bulk_index(index_list, parent=project.slug, routing=route) if delete: log.info("Deleting files not in commit: %s", commit) # TODO: AK Make sure this works delete_query = { "query": { "bool": { "must": [ {"term": {"project": project.slug, }}, {"term": {"version": version.slug, }}, ], "must_not": { "term": { "commit": commit } } } } } page_obj.delete_document(body=delete_query)
def index_search_request(version, page_list, commit, project_scale, page_scale, section=True, delete=True): log_msg = ' '.join([page['path'] for page in page_list]) log.info("(Server Search) Indexing Pages: %s [%s]" % ( version.project.slug, log_msg)) project = version.project page_obj = PageIndex() section_obj = SectionIndex() # tags = [tag.name for tag in project.tags.all()] project_obj = ProjectIndex() project_obj.index_document(data={ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), 'tags': None, 'weight': project_scale, }) index_list = [] section_index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_id = hashlib.md5('%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], 'taxonomy': None, 'commit': commit, 'weight': page_scale + project_scale, }) if section: for section in page['sections']: section_index_list.append({ 'id': hashlib.md5( '%s-%s-%s-%s' % (project.slug, version.slug, page['path'], section['id']) ).hexdigest(), 'project': project.slug, 'version': version.slug, 'path': page['path'], 'page_id': section['id'], 'title': section['title'], 'content': section['content'], 'weight': page_scale, }) section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug) page_obj.bulk_index(index_list, parent=project.slug) if delete: log.info("(Server Search) Deleting files not in commit: %s" % commit) # TODO: AK Make sure this works delete_query = { "query": { "bool": { "must": [ {"term": {"project": project.slug, }}, {"term": {"version": version.slug, }}, ], "must_not": { "term": { "commit": commit } } } } } page_obj.delete_document(body=delete_query)
def index_search_request(version, page_list, commit, project_scale, page_scale, section=True, delete=True): log_msg = ' '.join([page['path'] for page in page_list]) log.info("(Server Search) Indexing Pages: %s [%s]" % (version.project.slug, log_msg)) project = version.project page_obj = PageIndex() section_obj = SectionIndex() # tags = [tag.name for tag in project.tags.all()] project_obj = ProjectIndex() project_obj.index_document( data={ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), 'tags': None, 'weight': project_scale, }) index_list = [] section_index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_id = hashlib.md5( '%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], 'taxonomy': None, 'commit': commit, 'weight': page_scale + project_scale, }) if section: for section in page['sections']: section_index_list.append({ 'id': hashlib.md5('%s-%s-%s-%s' % (project.slug, version.slug, page['path'], section['id'])).hexdigest(), 'project': project.slug, 'version': version.slug, 'path': page['path'], 'page_id': section['id'], 'title': section['title'], 'content': section['content'], 'weight': page_scale, }) section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug) page_obj.bulk_index(index_list, parent=project.slug) if delete: log.info("(Server Search) Deleting files not in commit: %s" % commit) # TODO: AK Make sure this works delete_query = { "query": { "bool": { "must": [ { "term": { "project": project.slug, } }, { "term": { "version": version.slug, } }, ], "must_not": { "term": { "commit": commit } } } } } page_obj.delete_document(body=delete_query)
from readthedocs.search.indexes import Index, PageIndex, ProjectIndex, SectionIndex # Create the index. index = Index() index_name = index.timestamped_index() index.create_index(index_name) index.update_aliases(index_name) # Update mapping proj = ProjectIndex() proj.put_mapping() page = PageIndex() page.put_mapping() sec = SectionIndex() sec.put_mapping()