from collections import OrderedDict from datetime import datetime from typing import List from uuid import uuid4 import dateparser from fastapi import APIRouter, Request from app.models import (SearchArticle, SearchLogData, SearchLogType, SearchQueryResponse, SearchVertical) from app.settings import settings from app.util.logging import build_timed_logger from app.util.request import get_doc_url, get_multivalued_field, get_request_ip, get_author_field router = APIRouter() search_logger = build_timed_logger('search_logger', 'search.log') @router.get('/search', response_model=SearchQueryResponse) async def get_search(request: Request, query: str, vertical: SearchVertical): # Get search results from Lucene index. try: searcher_hits = request.app.state.searcher.search(query, vertical) except: # Sometimes errors out due to encoding bugs. searcher_hits = [] # Get predictions from T5. t5_scores = [hit.score for hit in searcher_hits] # Sort results by T5 scores.
import subprocess from collections import OrderedDict from datetime import datetime from typing import List from uuid import uuid4 import dateparser from app.models import SearchArticle, SearchLogData, SearchLogType, SearchQueryResponse from app.settings import settings from app.util.logging import build_timed_logger from app.util.request import get_request_ip, populate_article from fastapi import APIRouter, Request router = APIRouter() search_logger = build_timed_logger("search_logger", "search.log") @router.get("/search", response_model=SearchQueryResponse) async def get_search(request: Request, query: str): # Get search results from Lucene index. rc = subprocess.call( "python -m pyserini.index -collection JsonCollection -generator DefaultLuceneDocumentGenerator \ -threads 1 -input integrations/resources/sample_collection_jsonl \ -index indexes/sample_collection_jsonl -storePositions -storeDocvectors -storeRaw" ) request.app.state.searcher.searcher = SimpleSearcher( 'indexes/sample_collection_jsonl')
import json from datetime import datetime from typing import List from uuid import uuid4 from fastapi import APIRouter, HTTPException, Request from app.models import (RelatedArticle, RelatedQueryResponse, SearchLogData, SearchLogType) from app.services.related_searcher import related_searcher from app.settings import settings from app.util.logging import build_timed_logger from app.util.request import get_request_ip router = APIRouter() related_logger = build_timed_logger('related_logger', settings.related_log_path) @router.get('/related/{uid}', response_model=RelatedQueryResponse) async def get_related(request: Request, uid: str, page_number: int = 1, query_id: str = None): # Invalid uid -> 404 if uid not in related_searcher.index_to_uid: raise HTTPException(status_code=404, detail="Item not found") source_vector = related_searcher.embedding[uid] related_results = [] # HNSW parameters.
from typing import List from uuid import uuid4 import dateparser from fastapi import APIRouter, Request from app.models import (Article, SearchLogData, SearchLogType, SearchQueryResponse, SearchVertical) from app.services.highlighter import highlighter from app.services.ranker import ranker from app.services.searcher import searcher from app.settings import settings from app.util.logging import build_timed_logger router = APIRouter() search_logger = build_timed_logger('search_logger', settings.search_log_path) @router.get('/search', response_model=SearchQueryResponse) async def get_search(request: Request, query: str, vertical: SearchVertical = SearchVertical.cord19): # Get search results from Lucene index. try: searcher_hits = searcher.search(query) except: # Sometimes errors out due to encoding bugs. searcher_hits = [] # Only rerank based on paragraph or abstract if original document was retrieved. ranked_paragraphs = [
import json from datetime import datetime from typing import List from uuid import uuid4 from app.models import (RelatedArticle, RelatedQueryResponse, SearchLogData, SearchLogType) from app.settings import settings from app.util.logging import build_timed_logger from app.util.request import get_request_ip, populate_article from fastapi import APIRouter, HTTPException, Request router = APIRouter() related_logger = build_timed_logger("related_logger", "related.log") @router.get("/related/{uid}", response_model=RelatedQueryResponse) async def get_related(request: Request, uid: str, page_number: int = 1, query_id: str = None): if not settings.related_search: raise HTTPException(status_code=404, detail="Related search not enabled") searcher = request.app.state.searcher related_searcher = request.app.state.related_searcher # Invalid uid -> 404 if uid not in related_searcher.uid_set: raise HTTPException(status_code=404, detail="Item not found")
import json from datetime import datetime from typing import List from uuid import uuid4 from fastapi import APIRouter, HTTPException, Request from app.models import (RelatedArticle, RelatedQueryResponse, SearchLogData, SearchLogType, SearchVertical) from app.settings import settings from app.util.logging import build_timed_logger from app.util.request import get_request_ip router = APIRouter() related_logger = build_timed_logger('related_logger', 'related.log') @router.get('/related/{uid}', response_model=RelatedQueryResponse) async def get_related(request: Request, uid: str, page_number: int = 1, query_id: str = None): searcher = request.app.state.searcher related_searcher = request.app.state.related_searcher # Invalid uid -> 404 if uid not in related_searcher.uid_set: raise HTTPException(status_code=404, detail="Item not found") source_vector = related_searcher.embedding[uid] related_results = []