def es_index(mock_elastic_index):
    # Create the index.
    index = Index()
    index_name = index.timestamped_index()
    index.create_index(index_name)
    index.update_aliases(index_name)
    # Update mapping
    proj = ProjectIndex()
    proj.put_mapping()
    page = PageIndex()
    page.put_mapping()
    sec = SectionIndex()
    sec.put_mapping()

    yield index
    index.delete_index(index_name=index_name)
def es_index(mock_elastic_index):
    # Create the index.
    index = Index()
    index_name = index.timestamped_index()
    index.create_index(index_name)
    index.update_aliases(index_name)
    # Update mapping
    proj = ProjectIndex()
    proj.put_mapping()
    page = PageIndex()
    page.put_mapping()
    sec = SectionIndex()
    sec.put_mapping()

    yield index
    index.delete_index(index_name=index_name)
    def handle(self, *args, **options):
        """Provision new ES instance"""
        index = Index()
        index_name = index.timestamped_index()

        log.info("Creating indexes..")
        index.create_index(index_name)
        index.update_aliases(index_name)

        log.info("Updating mappings..")
        proj = ProjectIndex()
        proj.put_mapping()
        page = PageIndex()
        page.put_mapping()
        sec = SectionIndex()
        sec.put_mapping()
        log.info("Done!")
Exemple #4
0
def search(request):
    project_slug = request.GET.get('project', None)
    version_slug = request.GET.get('version', LATEST)
    query = request.GET.get('q', None)
    if project_slug is None or query is None:
        return Response({'error': 'Need project and q'}, status=status.HTTP_400_BAD_REQUEST)
    log.debug("(API Search) %s" % query)

    kwargs = {}
    body = {
        "query": {
            "function_score": {
                "field_value_factor": {"field": "weight"},
                "query": {
                    "bool": {
                        "should": [
                            {"match": {
                                "title": {"query": query, "boost": 10}}},
                            {"match": {
                                "headers": {"query": query, "boost": 5}}},
                            {"match": {"content": {"query": query}}},
                        ]
                    }
                }
            }
        },
        "highlight": {
            "fields": {
                "title": {},
                "headers": {},
                "content": {},
            }
        },
        "fields": ["title", "project", "version", "path"],
        "size": 50  # TODO: Support pagination.
    }

    if project_slug:
        body['filter'] = {
            "and": [
                {"term": {"project": project_slug}},
                {"term": {"version": version_slug}},
            ]
        }
        # Add routing to optimize search by hitting the right shard.
        kwargs['routing'] = project_slug

    results = PageIndex().search(body, **kwargs)

    return Response({'results': results})
Exemple #5
0
def elastic_project_search(request, project_slug):
    """Use elastic search to search in a project."""
    queryset = Project.objects.protected(request.user)
    project = get_object_or_404(queryset, slug=project_slug)
    version_slug = request.GET.get('version', LATEST)
    query = request.GET.get('q', None)
    if query:
        user = ''
        if request.user.is_authenticated():
            user = request.user
        log.info(
            LOG_TEMPLATE.format(
                user=user,
                project=project or '',
                type='inproject',
                version=version_slug or '',
                language='',
                msg=query or '',
            ))

    if query:

        kwargs = {}
        body = {
            'query': {
                'bool': {
                    'should': [
                        {
                            'match': {
                                'title': {
                                    'query': query,
                                    'boost': 10
                                }
                            }
                        },
                        {
                            'match': {
                                'headers': {
                                    'query': query,
                                    'boost': 5
                                }
                            }
                        },
                        {
                            'match': {
                                'content': {
                                    'query': query
                                }
                            }
                        },
                    ]
                }
            },
            'highlight': {
                'fields': {
                    'title': {},
                    'headers': {},
                    'content': {},
                }
            },
            'fields': ['title', 'project', 'version', 'path'],
            'filter': {
                'and': [
                    {
                        'term': {
                            'project': project_slug
                        }
                    },
                    {
                        'term': {
                            'version': version_slug
                        }
                    },
                ]
            },
            'size': 50,  # TODO: Support pagination.
        }

        # Add routing to optimize search by hitting the right shard.
        kwargs['routing'] = project_slug

        results = PageIndex().search(body, **kwargs)
    else:
        results = {}

    if results:
        # pre and post 1.0 compat
        for num, hit in enumerate(results['hits']['hits']):
            for key, val in list(hit['fields'].items()):
                if isinstance(val, list):
                    results['hits']['hits'][num]['fields'][key] = val[0]

    return render(
        request,
        'search/elastic_project_search.html',
        {
            'project': project,
            'query': query,
            'results': results,
        },
    )
def index_search_request(version,
                         page_list,
                         commit,
                         project_scale,
                         page_scale,
                         section=True,
                         delete=True):
    """
    Update search indexes with build output JSON.

    In order to keep sub-projects all indexed on the same shard, indexes will be
    updated using the parent project's slug as the routing value.
    """
    # TODO refactor this function
    # pylint: disable=too-many-locals
    project = version.project

    log_msg = ' '.join([page['path'] for page in page_list])
    log.info("Updating search index: project=%s pages=[%s]", project.slug,
             log_msg)

    project_obj = ProjectIndex()
    project_obj.index_document(
        data={
            'id': project.pk,
            'name': project.name,
            'slug': project.slug,
            'description': project.description,
            'lang': project.language,
            'author': [user.username for user in project.users.all()],
            'url': project.get_absolute_url(),
            'tags': None,
            'weight': project_scale,
        })

    page_obj = PageIndex()
    section_obj = SectionIndex()
    index_list = []
    section_index_list = []
    routes = [project.slug]
    routes.extend([p.parent.slug for p in project.superprojects.all()])
    for page in page_list:
        log.debug("Indexing page: %s:%s", project.slug, page['path'])
        to_hash = '-'.join([project.slug, version.slug, page['path']])
        page_id = hashlib.md5(to_hash.encode('utf-8')).hexdigest()
        index_list.append({
            'id': page_id,
            'project': project.slug,
            'version': version.slug,
            'path': page['path'],
            'title': page['title'],
            'headers': page['headers'],
            'content': page['content'],
            'taxonomy': None,
            'commit': commit,
            'weight': page_scale + project_scale,
        })
        if section:
            for sect in page['sections']:
                id_to_hash = '-'.join(
                    [project.slug, version.slug, page['path'], sect['id']])
                section_index_list.append({
                    'id':
                    (hashlib.md5(id_to_hash.encode('utf-8')).hexdigest()),
                    'project':
                    project.slug,
                    'version':
                    version.slug,
                    'path':
                    page['path'],
                    'page_id':
                    sect['id'],
                    'title':
                    sect['title'],
                    'content':
                    sect['content'],
                    'weight':
                    page_scale,
                })
            for route in routes:
                section_obj.bulk_index(section_index_list,
                                       parent=page_id,
                                       routing=route)

    for route in routes:
        page_obj.bulk_index(index_list, parent=project.slug, routing=route)

    if delete:
        log.info("Deleting files not in commit: %s", commit)
        # TODO: AK Make sure this works
        delete_query = {
            "query": {
                "bool": {
                    "must": [
                        {
                            "term": {
                                "project": project.slug,
                            }
                        },
                        {
                            "term": {
                                "version": version.slug,
                            }
                        },
                    ],
                    "must_not": {
                        "term": {
                            "commit": commit
                        }
                    }
                }
            }
        }
        page_obj.delete_document(body=delete_query)
Exemple #7
0
def elastic_project_search(request, project_slug):
    """Use elastic search to search in a project"""
    queryset = Project.objects.protected(request.user)
    project = get_object_or_404(queryset, slug=project_slug)
    version_slug = request.GET.get('version', LATEST)
    query = request.GET.get('q', None)
    if query:
        user = ''
        if request.user.is_authenticated():
            user = request.user
        log.info(LOG_TEMPLATE.format(
            user=user,
            project=project or '',
            type='inproject',
            version=version_slug or '',
            language='',
            msg=query or '',
        ))

    if query:

        kwargs = {}
        body = {
            "query": {
                "bool": {
                    "should": [
                        {"match": {"title": {"query": query, "boost": 10}}},
                        {"match": {"headers": {"query": query, "boost": 5}}},
                        {"match": {"content": {"query": query}}},
                    ]
                }
            },
            "highlight": {
                "fields": {
                    "title": {},
                    "headers": {},
                    "content": {},
                }
            },
            "fields": ["title", "project", "version", "path"],
            "filter": {
                "and": [
                    {"term": {"project": project_slug}},
                    {"term": {"version": version_slug}},
                ]
            },
            "size": 50  # TODO: Support pagination.
        }

        # Add routing to optimize search by hitting the right shard.
        kwargs['routing'] = project_slug

        results = PageIndex().search(body, **kwargs)
    else:
        results = {}

    if results:
        # pre and post 1.0 compat
        for num, hit in enumerate(results['hits']['hits']):
            for key, val in hit['fields'].items():
                if isinstance(val, list):
                    results['hits']['hits'][num]['fields'][key] = val[0]

    return render_to_response(
        'search/elastic_project_search.html',
        {
            'project': project,
            'query': query,
            'results': results,
        },
        context_instance=RequestContext(request),
    )
Exemple #8
0
def index_search_request(version, page_list, commit, project_scale, page_scale,
                         section=True, delete=True):
    """Update search indexes with build output JSON

    In order to keep sub-projects all indexed on the same shard, indexes will be
    updated using the parent project's slug as the routing value.
    """
    # TODO refactor this function
    # pylint: disable=too-many-locals
    project = version.project

    log_msg = ' '.join([page['path'] for page in page_list])
    log.info("Updating search index: project=%s pages=[%s]",
             project.slug, log_msg)

    project_obj = ProjectIndex()
    project_obj.index_document(data={
        'id': project.pk,
        'name': project.name,
        'slug': project.slug,
        'description': project.description,
        'lang': project.language,
        'author': [user.username for user in project.users.all()],
        'url': project.get_absolute_url(),
        'tags': None,
        'weight': project_scale,
    })

    page_obj = PageIndex()
    section_obj = SectionIndex()
    index_list = []
    section_index_list = []
    routes = [project.slug]
    routes.extend([p.parent.slug for p in project.superprojects.all()])
    for page in page_list:
        log.debug("Indexing page: %s:%s", project.slug, page['path'])
        to_hash = '-'.join([project.slug, version.slug, page['path']])
        page_id = hashlib.md5(to_hash.encode('utf-8')).hexdigest()
        index_list.append({
            'id': page_id,
            'project': project.slug,
            'version': version.slug,
            'path': page['path'],
            'title': page['title'],
            'headers': page['headers'],
            'content': page['content'],
            'taxonomy': None,
            'commit': commit,
            'weight': page_scale + project_scale,
        })
        if section:
            for sect in page['sections']:
                id_to_hash = '-'.join([project.slug, version.slug,
                                       page['path'], sect['id']])
                section_index_list.append({
                    'id': (hashlib.md5(id_to_hash.encode('utf-8')).hexdigest()),
                    'project': project.slug,
                    'version': version.slug,
                    'path': page['path'],
                    'page_id': sect['id'],
                    'title': sect['title'],
                    'content': sect['content'],
                    'weight': page_scale,
                })
            for route in routes:
                section_obj.bulk_index(section_index_list, parent=page_id,
                                       routing=route)

    for route in routes:
        page_obj.bulk_index(index_list, parent=project.slug, routing=route)

    if delete:
        log.info("Deleting files not in commit: %s", commit)
        # TODO: AK Make sure this works
        delete_query = {
            "query": {
                "bool": {
                    "must": [
                        {"term": {"project": project.slug, }},
                        {"term": {"version": version.slug, }},
                    ],
                    "must_not": {
                        "term": {
                            "commit": commit
                        }
                    }
                }
            }
        }
        page_obj.delete_document(body=delete_query)
Exemple #9
0
def index_search_request(version, page_list, commit, project_scale, page_scale,
                         section=True, delete=True):
    log_msg = ' '.join([page['path'] for page in page_list])
    log.info("(Server Search) Indexing Pages: %s [%s]" % (
        version.project.slug, log_msg))
    project = version.project
    page_obj = PageIndex()
    section_obj = SectionIndex()

    # tags = [tag.name for tag in project.tags.all()]

    project_obj = ProjectIndex()
    project_obj.index_document(data={
        'id': project.pk,
        'name': project.name,
        'slug': project.slug,
        'description': project.description,
        'lang': project.language,
        'author': [user.username for user in project.users.all()],
        'url': project.get_absolute_url(),
        'tags': None,
        'weight': project_scale,
    })

    index_list = []
    section_index_list = []
    for page in page_list:
        log.debug("(API Index) %s:%s" % (project.slug, page['path']))
        page_id = hashlib.md5('%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest()
        index_list.append({
            'id': page_id,
            'project': project.slug,
            'version': version.slug,
            'path': page['path'],
            'title': page['title'],
            'headers': page['headers'],
            'content': page['content'],
            'taxonomy': None,
            'commit': commit,
            'weight': page_scale + project_scale,
        })
        if section:
            for section in page['sections']:
                section_index_list.append({
                    'id': hashlib.md5(
                        '%s-%s-%s-%s' % (project.slug, version.slug,
                                         page['path'], section['id'])
                    ).hexdigest(),
                    'project': project.slug,
                    'version': version.slug,
                    'path': page['path'],
                    'page_id': section['id'],
                    'title': section['title'],
                    'content': section['content'],
                    'weight': page_scale,
                })
            section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug)

    page_obj.bulk_index(index_list, parent=project.slug)

    if delete:
        log.info("(Server Search) Deleting files not in commit: %s" % commit)
        # TODO: AK Make sure this works
        delete_query = {
            "query": {
                "bool": {
                    "must": [
                        {"term": {"project": project.slug, }},
                        {"term": {"version": version.slug, }},
                    ],
                    "must_not": {
                        "term": {
                            "commit": commit
                        }
                    }
                }
            }
        }
        page_obj.delete_document(body=delete_query)
Exemple #10
0
def index_search_request(version,
                         page_list,
                         commit,
                         project_scale,
                         page_scale,
                         section=True,
                         delete=True):
    log_msg = ' '.join([page['path'] for page in page_list])
    log.info("(Server Search) Indexing Pages: %s [%s]" %
             (version.project.slug, log_msg))
    project = version.project
    page_obj = PageIndex()
    section_obj = SectionIndex()

    # tags = [tag.name for tag in project.tags.all()]

    project_obj = ProjectIndex()
    project_obj.index_document(
        data={
            'id': project.pk,
            'name': project.name,
            'slug': project.slug,
            'description': project.description,
            'lang': project.language,
            'author': [user.username for user in project.users.all()],
            'url': project.get_absolute_url(),
            'tags': None,
            'weight': project_scale,
        })

    index_list = []
    section_index_list = []
    for page in page_list:
        log.debug("(API Index) %s:%s" % (project.slug, page['path']))
        page_id = hashlib.md5(
            '%s-%s-%s' %
            (project.slug, version.slug, page['path'])).hexdigest()
        index_list.append({
            'id': page_id,
            'project': project.slug,
            'version': version.slug,
            'path': page['path'],
            'title': page['title'],
            'headers': page['headers'],
            'content': page['content'],
            'taxonomy': None,
            'commit': commit,
            'weight': page_scale + project_scale,
        })
        if section:
            for section in page['sections']:
                section_index_list.append({
                    'id':
                    hashlib.md5('%s-%s-%s-%s' %
                                (project.slug, version.slug, page['path'],
                                 section['id'])).hexdigest(),
                    'project':
                    project.slug,
                    'version':
                    version.slug,
                    'path':
                    page['path'],
                    'page_id':
                    section['id'],
                    'title':
                    section['title'],
                    'content':
                    section['content'],
                    'weight':
                    page_scale,
                })
            section_obj.bulk_index(section_index_list,
                                   parent=page_id,
                                   routing=project.slug)

    page_obj.bulk_index(index_list, parent=project.slug)

    if delete:
        log.info("(Server Search) Deleting files not in commit: %s" % commit)
        # TODO: AK Make sure this works
        delete_query = {
            "query": {
                "bool": {
                    "must": [
                        {
                            "term": {
                                "project": project.slug,
                            }
                        },
                        {
                            "term": {
                                "version": version.slug,
                            }
                        },
                    ],
                    "must_not": {
                        "term": {
                            "commit": commit
                        }
                    }
                }
            }
        }
        page_obj.delete_document(body=delete_query)
from readthedocs.search.indexes import Index, PageIndex, ProjectIndex, SectionIndex

# Create the index.
index = Index()
index_name = index.timestamped_index()
index.create_index(index_name)
index.update_aliases(index_name)
# Update mapping
proj = ProjectIndex()
proj.put_mapping()
page = PageIndex()
page.put_mapping()
sec = SectionIndex()
sec.put_mapping()