Ejemplo n.º 1
0
class Autocomplete:

    def __init__(self, project, indices, limit = 10):
        self.project = project
        self.limit = limit
        self.es = ElasticSearcher(output=ElasticSearcher.OUT_RAW, indices=indices)

    def get_fact_names(self, startswith):
        query = {"aggs": {'fact': {"nested": {"path": "texta_facts"}, "aggs": {'fact': {"terms": {"field": "texta_facts.fact", "size": self.limit, "include": f"{startswith}.*"}}}}}}

        self.es.update_query(query)
        results = self.es.search()

        facts = [a['key'] for a in results['aggregations']['fact']['fact']['buckets']]

        return facts


    def get_fact_values(self, startswith, fact_name):
        query = {"aggs": {'str_val': {"nested": {"path": "texta_facts"}, "aggs": {'str_val': {"terms": {"field": "texta_facts.fact"}, "aggs": {"fact_values": {"terms": {"field": "texta_facts.str_val", "size": self.limit, "include": f"{startswith}.*"}}}}}}}}

        self.es.update_query(query)
        results = self.es.search()

        facts = []
        for bucket in results['aggregations']['str_val']['str_val']['buckets']:
            if bucket['key'] == fact_name:
                facts += [sub_bucket['key'] for sub_bucket in bucket['fact_values']['buckets']]

        return facts

    def get_lexicons(self, startswith):
        # TODO
        pass
Ejemplo n.º 2
0
    def post(self, request, project_pk: int):
        """Simplified search interface for making Elasticsearch queries."""
        serializer = ProjectSimplifiedSearchSerializer(data=request.data)
        if not serializer.is_valid():
            raise SerializerNotValid(detail=serializer.errors)

        project_object = get_object_or_404(Project, pk=project_pk)
        self.check_object_permissions(request, project_object)
        project_indices = list(project_object.get_indices())
        project_fields = project_object.get_elastic_fields(path_list=True)
        # test if indices exist
        if not project_indices:
            raise ProjectValidationFailed(detail="Project has no indices")
        # test if indices are valid
        if serializer.validated_data['match_indices']:
            if not set(serializer.validated_data['match_indices']).issubset(set(project_indices)):
                raise ProjectValidationFailed(detail=f"Index names are not valid for this project. allowed values are: {project_indices}")
        # test if fields are valid
        if serializer.validated_data['match_fields']:
            if not set(serializer.validated_data['match_fields']).issubset(set(project_fields)):
                raise ProjectValidationFailed(detail=f"Fields names are not valid for this project. allowed values are: {project_fields}")

        es = ElasticSearcher(indices=project_indices, output=ElasticSearcher.OUT_DOC)
        q = Query(operator=serializer.validated_data['operator'])
        # if input is string, convert to list
        # if unknown format, return error
        match_text = serializer.validated_data['match_text']
        if isinstance(match_text, list):
            match_texts = [str(item) for item in match_text if item]
        elif isinstance(match_text, str):
            match_texts = [match_text]
        else:
            return Response({'error': f'match text is in unknown format: {match_text}'}, status=status.HTTP_400_BAD_REQUEST)
        # add query filters
        for item in match_texts:
            q.add_string_filter(item, match_type=serializer.validated_data["match_type"])
        # update query
        es.update_query(q.query)
        # retrieve results
        results = es.search(size=serializer.validated_data["size"])
        return Response(results, status=status.HTTP_200_OK)
Ejemplo n.º 3
0
    def post(self, request, project_pk: int):
        """Executes **raw** Elasticsearch query on all project indices."""
        project = get_object_or_404(Project, pk=project_pk)
        self.check_object_permissions(request, project)
        serializer = ProjectSearchByQuerySerializer(data=request.data)

        if not serializer.is_valid():
            raise SerializerNotValid(detail=serializer.errors)

        indices = project.get_available_or_all_project_indices(serializer.validated_data["indices"])

        if not indices:
            raise ProjectValidationFailed(detail="No indices supplied and project has no indices")

        es = None
        if serializer.validated_data["output_type"]:
            es = ElasticSearcher(indices=indices, output=serializer.validated_data["output_type"])
        else:
            es = ElasticSearcher(indices=indices, output=ElasticSearcher.OUT_DOC_WITH_TOTAL_HL_AGGS)

        es.update_query(serializer.validated_data["query"])
        results = es.search()
        return Response(results, status=status.HTTP_200_OK)