Esempio n. 1
0
    def tag_random_doc(self, request, pk=None, project_pk=None):
        """Returns prediction for a random document in Elasticsearch."""
        # get tagger object
        tagger_object = self.get_object()
        # check if tagger exists

        if not tagger_object.model.path:
            raise NonExistantModelError()

        if not tagger_object.model.path:
            return Response({'error': 'model does not exist (yet?)'},
                            status=status.HTTP_400_BAD_REQUEST)

        serializer = TagRandomDocSerializer(data=request.data)
        serializer.is_valid(raise_exception=True)

        indices = [
            index["name"] for index in serializer.validated_data["indices"]
        ]
        indices = tagger_object.get_available_or_all_indices(indices)

        # retrieve tagger fields
        tagger_fields = json.loads(tagger_object.fields)
        if not ElasticCore().check_if_indices_exist(indices):
            return Response(
                {
                    'error':
                    f'One or more index from {list(indices)} do not exist'
                },
                status=status.HTTP_400_BAD_REQUEST)

        # retrieve random document
        random_doc = ElasticSearcher(indices=indices).random_documents(
            size=1)[0]

        # filter out correct fields from the document
        random_doc_filtered = {
            k: v
            for k, v in random_doc.items() if k in tagger_fields
        }

        # apply tagger
        tagger_response = apply_tagger(tagger_object.id,
                                       random_doc_filtered,
                                       input_type='doc')
        response = {"document": random_doc, "prediction": tagger_response}
        return Response(response, status=status.HTTP_200_OK)
Esempio n. 2
0
    def tag_random_doc(self, request, pk=None, project_pk=None):
        """
        API endpoint for tagging a random document.
        """
        logging.getLogger(INFO_LOGGER).info(
            f"[Tag Random doc] Starting tag_random_doc...")
        # get hybrid tagger object
        hybrid_tagger_object = self.get_object()

        # check if any of the models ready
        if not hybrid_tagger_object.taggers.filter(
                task__status=Task.STATUS_COMPLETED):
            raise NonExistantModelError()

        # retrieve tagger fields from the first object
        first_tagger = hybrid_tagger_object.taggers.first()
        tagger_fields = json.loads(first_tagger.fields)
        # error if redis not available

        if not get_redis_status()['alive']:
            raise RedisNotAvailable(
                'Redis not available. Check if Redis is running.')

        serializer = TagRandomDocSerializer(data=request.data)
        serializer.is_valid(raise_exception=True)

        indices = [
            index["name"] for index in serializer.validated_data["indices"]
        ]
        indices = first_tagger.get_available_or_all_indices(indices)

        if not ElasticCore().check_if_indices_exist(indices):
            return Response(
                {
                    'error':
                    f'One or more index from {list(indices)} does not exist'
                },
                status=status.HTTP_400_BAD_REQUEST)

        # retrieve random document
        random_doc = ElasticSearcher(indices=indices).random_documents(
            size=1)[0]
        # filter out correct fields from the document
        random_doc_filtered = {
            k: v
            for k, v in random_doc.items() if k in tagger_fields
        }

        tagger_group_id = self.get_object().pk

        # combine document field values into one string
        combined_texts = '\n'.join(random_doc_filtered.values())
        combined_texts, tags = get_mlp(tagger_group_id,
                                       combined_texts,
                                       lemmatize=False)
        # retrieve tag candidates
        tag_candidates = get_tag_candidates(tagger_group_id,
                                            combined_texts,
                                            ignore_tags=tags)
        # get tags
        tags += apply_tagger_group(tagger_group_id,
                                   random_doc_filtered,
                                   tag_candidates,
                                   request,
                                   input_type='doc')
        # return document with tags
        response = {"document": random_doc, "tags": tags}
        return Response(response, status=status.HTTP_200_OK)