def tag_random_doc(self, request, pk=None, project_pk=None): """Returns prediction for a random document in Elasticsearch.""" # get tagger object tagger_object = self.get_object() # check if tagger exists if not tagger_object.model.path: raise NonExistantModelError() if not tagger_object.model.path: return Response({'error': 'model does not exist (yet?)'}, status=status.HTTP_400_BAD_REQUEST) serializer = TagRandomDocSerializer(data=request.data) serializer.is_valid(raise_exception=True) indices = [ index["name"] for index in serializer.validated_data["indices"] ] indices = tagger_object.get_available_or_all_indices(indices) # retrieve tagger fields tagger_fields = json.loads(tagger_object.fields) if not ElasticCore().check_if_indices_exist(indices): return Response( { 'error': f'One or more index from {list(indices)} do not exist' }, status=status.HTTP_400_BAD_REQUEST) # retrieve random document random_doc = ElasticSearcher(indices=indices).random_documents( size=1)[0] # filter out correct fields from the document random_doc_filtered = { k: v for k, v in random_doc.items() if k in tagger_fields } # apply tagger tagger_response = apply_tagger(tagger_object.id, random_doc_filtered, input_type='doc') response = {"document": random_doc, "prediction": tagger_response} return Response(response, status=status.HTTP_200_OK)
def tag_random_doc(self, request, pk=None, project_pk=None): """ API endpoint for tagging a random document. """ logging.getLogger(INFO_LOGGER).info( f"[Tag Random doc] Starting tag_random_doc...") # get hybrid tagger object hybrid_tagger_object = self.get_object() # check if any of the models ready if not hybrid_tagger_object.taggers.filter( task__status=Task.STATUS_COMPLETED): raise NonExistantModelError() # retrieve tagger fields from the first object first_tagger = hybrid_tagger_object.taggers.first() tagger_fields = json.loads(first_tagger.fields) # error if redis not available if not get_redis_status()['alive']: raise RedisNotAvailable( 'Redis not available. Check if Redis is running.') serializer = TagRandomDocSerializer(data=request.data) serializer.is_valid(raise_exception=True) indices = [ index["name"] for index in serializer.validated_data["indices"] ] indices = first_tagger.get_available_or_all_indices(indices) if not ElasticCore().check_if_indices_exist(indices): return Response( { 'error': f'One or more index from {list(indices)} does not exist' }, status=status.HTTP_400_BAD_REQUEST) # retrieve random document random_doc = ElasticSearcher(indices=indices).random_documents( size=1)[0] # filter out correct fields from the document random_doc_filtered = { k: v for k, v in random_doc.items() if k in tagger_fields } tagger_group_id = self.get_object().pk # combine document field values into one string combined_texts = '\n'.join(random_doc_filtered.values()) combined_texts, tags = get_mlp(tagger_group_id, combined_texts, lemmatize=False) # retrieve tag candidates tag_candidates = get_tag_candidates(tagger_group_id, combined_texts, ignore_tags=tags) # get tags tags += apply_tagger_group(tagger_group_id, random_doc_filtered, tag_candidates, request, input_type='doc') # return document with tags response = {"document": random_doc, "tags": tags} return Response(response, status=status.HTTP_200_OK)