def es_index(mock_elastic_index): # Create the index. index = Index() index_name = index.timestamped_index() index.create_index(index_name) index.update_aliases(index_name) # Update mapping proj = ProjectIndex() proj.put_mapping() page = PageIndex() page.put_mapping() sec = SectionIndex() sec.put_mapping() yield index index.delete_index(index_name=index_name)
def project_search(request): query = request.GET.get('q', None) if query is None: return Response({'error': 'Need project and q'}, status=status.HTTP_400_BAD_REQUEST) log.debug("(API Project Search) %s" % (query)) body = { "query": { "function_score": { "field_value_factor": {"field": "weight"}, "query": { "bool": { "should": [ {"match": {"name": {"query": query, "boost": 10}}}, {"match": {"description": {"query": query}}}, ] } } } }, "fields": ["name", "slug", "description", "lang"] } results = ProjectIndex().search(body) return Response({'results': results})
def handle(self, *args, **options): """Provision new ES instance""" index = Index() index_name = index.timestamped_index() log.info("Creating indexes..") index.create_index(index_name) index.update_aliases(index_name) log.info("Updating mappings..") proj = ProjectIndex() proj.put_mapping() page = PageIndex() page.put_mapping() sec = SectionIndex() sec.put_mapping() log.info("Done!")
def index_search_request(version, page_list, commit, project_scale, page_scale, section=True, delete=True): """ Update search indexes with build output JSON. In order to keep sub-projects all indexed on the same shard, indexes will be updated using the parent project's slug as the routing value. """ # TODO refactor this function # pylint: disable=too-many-locals project = version.project log_msg = ' '.join([page['path'] for page in page_list]) log.info("Updating search index: project=%s pages=[%s]", project.slug, log_msg) project_obj = ProjectIndex() project_obj.index_document( data={ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), 'tags': None, 'weight': project_scale, }) page_obj = PageIndex() section_obj = SectionIndex() index_list = [] section_index_list = [] routes = [project.slug] routes.extend([p.parent.slug for p in project.superprojects.all()]) for page in page_list: log.debug("Indexing page: %s:%s", project.slug, page['path']) to_hash = '-'.join([project.slug, version.slug, page['path']]) page_id = hashlib.md5(to_hash.encode('utf-8')).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], 'taxonomy': None, 'commit': commit, 'weight': page_scale + project_scale, }) if section: for sect in page['sections']: id_to_hash = '-'.join( [project.slug, version.slug, page['path'], sect['id']]) section_index_list.append({ 'id': (hashlib.md5(id_to_hash.encode('utf-8')).hexdigest()), 'project': project.slug, 'version': version.slug, 'path': page['path'], 'page_id': sect['id'], 'title': sect['title'], 'content': sect['content'], 'weight': page_scale, }) for route in routes: section_obj.bulk_index(section_index_list, parent=page_id, routing=route) for route in routes: page_obj.bulk_index(index_list, parent=project.slug, routing=route) if delete: log.info("Deleting files not in commit: %s", commit) # TODO: AK Make sure this works delete_query = { "query": { "bool": { "must": [ { "term": { "project": project.slug, } }, { "term": { "version": version.slug, } }, ], "must_not": { "term": { "commit": commit } } } } } page_obj.delete_document(body=delete_query)
def index_search_request(version, page_list, commit, project_scale, page_scale, section=True, delete=True): """Update search indexes with build output JSON In order to keep sub-projects all indexed on the same shard, indexes will be updated using the parent project's slug as the routing value. """ # TODO refactor this function # pylint: disable=too-many-locals project = version.project log_msg = ' '.join([page['path'] for page in page_list]) log.info("Updating search index: project=%s pages=[%s]", project.slug, log_msg) project_obj = ProjectIndex() project_obj.index_document(data={ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), 'tags': None, 'weight': project_scale, }) page_obj = PageIndex() section_obj = SectionIndex() index_list = [] section_index_list = [] routes = [project.slug] routes.extend([p.parent.slug for p in project.superprojects.all()]) for page in page_list: log.debug("Indexing page: %s:%s", project.slug, page['path']) to_hash = '-'.join([project.slug, version.slug, page['path']]) page_id = hashlib.md5(to_hash.encode('utf-8')).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], 'taxonomy': None, 'commit': commit, 'weight': page_scale + project_scale, }) if section: for sect in page['sections']: id_to_hash = '-'.join([project.slug, version.slug, page['path'], sect['id']]) section_index_list.append({ 'id': (hashlib.md5(id_to_hash.encode('utf-8')).hexdigest()), 'project': project.slug, 'version': version.slug, 'path': page['path'], 'page_id': sect['id'], 'title': sect['title'], 'content': sect['content'], 'weight': page_scale, }) for route in routes: section_obj.bulk_index(section_index_list, parent=page_id, routing=route) for route in routes: page_obj.bulk_index(index_list, parent=project.slug, routing=route) if delete: log.info("Deleting files not in commit: %s", commit) # TODO: AK Make sure this works delete_query = { "query": { "bool": { "must": [ {"term": {"project": project.slug, }}, {"term": {"version": version.slug, }}, ], "must_not": { "term": { "commit": commit } } } } } page_obj.delete_document(body=delete_query)
def index_search_request(version, page_list, commit, project_scale, page_scale, section=True, delete=True): log_msg = ' '.join([page['path'] for page in page_list]) log.info("(Server Search) Indexing Pages: %s [%s]" % ( version.project.slug, log_msg)) project = version.project page_obj = PageIndex() section_obj = SectionIndex() # tags = [tag.name for tag in project.tags.all()] project_obj = ProjectIndex() project_obj.index_document(data={ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), 'tags': None, 'weight': project_scale, }) index_list = [] section_index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_id = hashlib.md5('%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], 'taxonomy': None, 'commit': commit, 'weight': page_scale + project_scale, }) if section: for section in page['sections']: section_index_list.append({ 'id': hashlib.md5( '%s-%s-%s-%s' % (project.slug, version.slug, page['path'], section['id']) ).hexdigest(), 'project': project.slug, 'version': version.slug, 'path': page['path'], 'page_id': section['id'], 'title': section['title'], 'content': section['content'], 'weight': page_scale, }) section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug) page_obj.bulk_index(index_list, parent=project.slug) if delete: log.info("(Server Search) Deleting files not in commit: %s" % commit) # TODO: AK Make sure this works delete_query = { "query": { "bool": { "must": [ {"term": {"project": project.slug, }}, {"term": {"version": version.slug, }}, ], "must_not": { "term": { "commit": commit } } } } } page_obj.delete_document(body=delete_query)
def index_search_request(version, page_list, commit, project_scale, page_scale, section=True, delete=True): log_msg = ' '.join([page['path'] for page in page_list]) log.info("(Server Search) Indexing Pages: %s [%s]" % (version.project.slug, log_msg)) project = version.project page_obj = PageIndex() section_obj = SectionIndex() # tags = [tag.name for tag in project.tags.all()] project_obj = ProjectIndex() project_obj.index_document( data={ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), 'tags': None, 'weight': project_scale, }) index_list = [] section_index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_id = hashlib.md5( '%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], 'taxonomy': None, 'commit': commit, 'weight': page_scale + project_scale, }) if section: for section in page['sections']: section_index_list.append({ 'id': hashlib.md5('%s-%s-%s-%s' % (project.slug, version.slug, page['path'], section['id'])).hexdigest(), 'project': project.slug, 'version': version.slug, 'path': page['path'], 'page_id': section['id'], 'title': section['title'], 'content': section['content'], 'weight': page_scale, }) section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug) page_obj.bulk_index(index_list, parent=project.slug) if delete: log.info("(Server Search) Deleting files not in commit: %s" % commit) # TODO: AK Make sure this works delete_query = { "query": { "bool": { "must": [ { "term": { "project": project.slug, } }, { "term": { "version": version.slug, } }, ], "must_not": { "term": { "commit": commit } } } } } page_obj.delete_document(body=delete_query)
from readthedocs.search.indexes import Index, PageIndex, ProjectIndex, SectionIndex # Create the index. index = Index() index_name = index.timestamped_index() index.create_index(index_name) index.update_aliases(index_name) # Update mapping proj = ProjectIndex() proj.put_mapping() page = PageIndex() page.put_mapping() sec = SectionIndex() sec.put_mapping()