logging.getLogger("pipelines").setLevel(LOG_LEVEL) logger = logging.getLogger("pipelines") from pydantic import BaseConfig BaseConfig.arbitrary_types_allowed = True router = APIRouter() PIPELINE = Pipeline.load_from_yaml(Path(PIPELINE_YAML_PATH), pipeline_name=QUERY_PIPELINE_NAME) DOCUMENT_STORE = PIPELINE.get_document_store() logging.info(f"Loaded pipeline nodes: {PIPELINE.graph.nodes.keys()}") concurrency_limiter = RequestLimiter(CONCURRENT_REQUEST_PER_WORKER) logging.info("Concurrent requests per worker: {CONCURRENT_REQUEST_PER_WORKER}") @router.get("/initialized") def check_status(): """ This endpoint can be used during startup to understand if the server is ready to take any requests, or is still loading. The recommended approach is to call this endpoint with a short timeout, like 500ms, and in case of no reply, consider the server busy. """ return True
meta: Optional[Dict[str, Optional[str]]] class AnswersToIndividualQuestion(BaseModel): question: str answers: List[Optional[Answer]] class Answers(BaseModel): results: List[AnswersToIndividualQuestion] ############################################# # Endpoints ############################################# doc_qa_limiter = RequestLimiter(CONCURRENT_REQUEST_PER_WORKER) @router.post("/models/{model_id}/doc-qa", response_model=Answers, response_model_exclude_unset=True) def doc_qa(model_id: int, request: Question): with doc_qa_limiter.run(): start_time = time.time() finder = FINDERS.get(model_id, None) if not finder: raise HTTPException( status_code=404, detail= f"Couldn't get Finder with ID {model_id}. Available IDs: {list(FINDERS.keys())}" )
offset_end: int offset_start_in_doc: Optional[int] offset_end_in_doc: Optional[int] document_id: Optional[str] = None meta: Optional[Dict[str, str]] class Response(BaseModel): query: str answers: List[Answer] PIPELINE = Pipeline.load_from_yaml(Path(PIPELINE_YAML_PATH), pipeline_name=QUERY_PIPELINE_NAME) logger.info(f"Loaded pipeline nodes: {PIPELINE.graph.nodes.keys()}") concurrency_limiter = RequestLimiter(4) @router.post("/query", response_model=Response) def query(request: Request): with concurrency_limiter.run(): result = _process_request(PIPELINE, request) return result def _process_request(pipeline, request) -> Response: start_time = time.time() filters = {} if request.filters: # put filter values into a list and remove filters with null value