class Autocomplete: def __init__(self, project, indices, limit = 10): self.project = project self.limit = limit self.es = ElasticSearcher(output=ElasticSearcher.OUT_RAW, indices=indices) def get_fact_names(self, startswith): query = {"aggs": {'fact': {"nested": {"path": "texta_facts"}, "aggs": {'fact': {"terms": {"field": "texta_facts.fact", "size": self.limit, "include": f"{startswith}.*"}}}}}} self.es.update_query(query) results = self.es.search() facts = [a['key'] for a in results['aggregations']['fact']['fact']['buckets']] return facts def get_fact_values(self, startswith, fact_name): query = {"aggs": {'str_val': {"nested": {"path": "texta_facts"}, "aggs": {'str_val': {"terms": {"field": "texta_facts.fact"}, "aggs": {"fact_values": {"terms": {"field": "texta_facts.str_val", "size": self.limit, "include": f"{startswith}.*"}}}}}}}} self.es.update_query(query) results = self.es.search() facts = [] for bucket in results['aggregations']['str_val']['str_val']['buckets']: if bucket['key'] == fact_name: facts += [sub_bucket['key'] for sub_bucket in bucket['fact_values']['buckets']] return facts def get_lexicons(self, startswith): # TODO pass
def post(self, request, project_pk: int): """Simplified search interface for making Elasticsearch queries.""" serializer = ProjectSimplifiedSearchSerializer(data=request.data) if not serializer.is_valid(): raise SerializerNotValid(detail=serializer.errors) project_object = get_object_or_404(Project, pk=project_pk) self.check_object_permissions(request, project_object) project_indices = list(project_object.get_indices()) project_fields = project_object.get_elastic_fields(path_list=True) # test if indices exist if not project_indices: raise ProjectValidationFailed(detail="Project has no indices") # test if indices are valid if serializer.validated_data['match_indices']: if not set(serializer.validated_data['match_indices']).issubset(set(project_indices)): raise ProjectValidationFailed(detail=f"Index names are not valid for this project. allowed values are: {project_indices}") # test if fields are valid if serializer.validated_data['match_fields']: if not set(serializer.validated_data['match_fields']).issubset(set(project_fields)): raise ProjectValidationFailed(detail=f"Fields names are not valid for this project. allowed values are: {project_fields}") es = ElasticSearcher(indices=project_indices, output=ElasticSearcher.OUT_DOC) q = Query(operator=serializer.validated_data['operator']) # if input is string, convert to list # if unknown format, return error match_text = serializer.validated_data['match_text'] if isinstance(match_text, list): match_texts = [str(item) for item in match_text if item] elif isinstance(match_text, str): match_texts = [match_text] else: return Response({'error': f'match text is in unknown format: {match_text}'}, status=status.HTTP_400_BAD_REQUEST) # add query filters for item in match_texts: q.add_string_filter(item, match_type=serializer.validated_data["match_type"]) # update query es.update_query(q.query) # retrieve results results = es.search(size=serializer.validated_data["size"]) return Response(results, status=status.HTTP_200_OK)
def post(self, request, project_pk: int): """Executes **raw** Elasticsearch query on all project indices.""" project = get_object_or_404(Project, pk=project_pk) self.check_object_permissions(request, project) serializer = ProjectSearchByQuerySerializer(data=request.data) if not serializer.is_valid(): raise SerializerNotValid(detail=serializer.errors) indices = project.get_available_or_all_project_indices(serializer.validated_data["indices"]) if not indices: raise ProjectValidationFailed(detail="No indices supplied and project has no indices") es = None if serializer.validated_data["output_type"]: es = ElasticSearcher(indices=indices, output=serializer.validated_data["output_type"]) else: es = ElasticSearcher(indices=indices, output=ElasticSearcher.OUT_DOC_WITH_TOTAL_HL_AGGS) es.update_query(serializer.validated_data["query"]) results = es.search() return Response(results, status=status.HTTP_200_OK)