Ejemplo n.º 1
0
from collections import OrderedDict
from datetime import datetime
from typing import List
from uuid import uuid4

import dateparser
from fastapi import APIRouter, Request

from app.models import (SearchArticle, SearchLogData, SearchLogType,
                        SearchQueryResponse, SearchVertical)
from app.settings import settings
from app.util.logging import build_timed_logger
from app.util.request import get_doc_url, get_multivalued_field, get_request_ip, get_author_field

router = APIRouter()
search_logger = build_timed_logger('search_logger', 'search.log')


@router.get('/search', response_model=SearchQueryResponse)
async def get_search(request: Request, query: str, vertical: SearchVertical):
    # Get search results from Lucene index.
    try:
        searcher_hits = request.app.state.searcher.search(query, vertical)
    except:
        # Sometimes errors out due to encoding bugs.
        searcher_hits = []

    # Get predictions from T5.
    t5_scores = [hit.score for hit in searcher_hits]

    # Sort results by T5 scores.
Ejemplo n.º 2
0
import subprocess

from collections import OrderedDict
from datetime import datetime
from typing import List
from uuid import uuid4

import dateparser
from app.models import SearchArticle, SearchLogData, SearchLogType, SearchQueryResponse
from app.settings import settings
from app.util.logging import build_timed_logger
from app.util.request import get_request_ip, populate_article
from fastapi import APIRouter, Request

router = APIRouter()
search_logger = build_timed_logger("search_logger", "search.log")


@router.get("/search", response_model=SearchQueryResponse)
async def get_search(request: Request, query: str):
    # Get search results from Lucene index.

    rc = subprocess.call(
        "python -m pyserini.index -collection JsonCollection -generator DefaultLuceneDocumentGenerator \
                          -threads 1 -input integrations/resources/sample_collection_jsonl \
                          -index indexes/sample_collection_jsonl -storePositions -storeDocvectors -storeRaw"
    )

    request.app.state.searcher.searcher = SimpleSearcher(
        'indexes/sample_collection_jsonl')
Ejemplo n.º 3
0
import json
from datetime import datetime
from typing import List
from uuid import uuid4

from fastapi import APIRouter, HTTPException, Request

from app.models import (RelatedArticle, RelatedQueryResponse, SearchLogData,
                        SearchLogType)
from app.services.related_searcher import related_searcher
from app.settings import settings
from app.util.logging import build_timed_logger
from app.util.request import get_request_ip

router = APIRouter()
related_logger = build_timed_logger('related_logger',
                                    settings.related_log_path)


@router.get('/related/{uid}', response_model=RelatedQueryResponse)
async def get_related(request: Request,
                      uid: str,
                      page_number: int = 1,
                      query_id: str = None):
    # Invalid uid -> 404
    if uid not in related_searcher.index_to_uid:
        raise HTTPException(status_code=404, detail="Item not found")

    source_vector = related_searcher.embedding[uid]
    related_results = []

    # HNSW parameters.
Ejemplo n.º 4
0
from typing import List
from uuid import uuid4

import dateparser
from fastapi import APIRouter, Request

from app.models import (Article, SearchLogData, SearchLogType,
                        SearchQueryResponse, SearchVertical)
from app.services.highlighter import highlighter
from app.services.ranker import ranker
from app.services.searcher import searcher
from app.settings import settings
from app.util.logging import build_timed_logger

router = APIRouter()
search_logger = build_timed_logger('search_logger', settings.search_log_path)


@router.get('/search', response_model=SearchQueryResponse)
async def get_search(request: Request,
                     query: str,
                     vertical: SearchVertical = SearchVertical.cord19):
    # Get search results from Lucene index.
    try:
        searcher_hits = searcher.search(query)
    except:
        # Sometimes errors out due to encoding bugs.
        searcher_hits = []

    # Only rerank based on paragraph or abstract if original document was retrieved.
    ranked_paragraphs = [
Ejemplo n.º 5
0
import json
from datetime import datetime
from typing import List
from uuid import uuid4

from app.models import (RelatedArticle, RelatedQueryResponse, SearchLogData,
                        SearchLogType)
from app.settings import settings
from app.util.logging import build_timed_logger
from app.util.request import get_request_ip, populate_article
from fastapi import APIRouter, HTTPException, Request

router = APIRouter()
related_logger = build_timed_logger("related_logger", "related.log")


@router.get("/related/{uid}", response_model=RelatedQueryResponse)
async def get_related(request: Request,
                      uid: str,
                      page_number: int = 1,
                      query_id: str = None):
    if not settings.related_search:
        raise HTTPException(status_code=404,
                            detail="Related search not enabled")

    searcher = request.app.state.searcher
    related_searcher = request.app.state.related_searcher

    # Invalid uid -> 404
    if uid not in related_searcher.uid_set:
        raise HTTPException(status_code=404, detail="Item not found")
Ejemplo n.º 6
0
import json
from datetime import datetime
from typing import List
from uuid import uuid4

from fastapi import APIRouter, HTTPException, Request

from app.models import (RelatedArticle, RelatedQueryResponse, SearchLogData,
                        SearchLogType, SearchVertical)
from app.settings import settings
from app.util.logging import build_timed_logger
from app.util.request import get_request_ip

router = APIRouter()
related_logger = build_timed_logger('related_logger', 'related.log')


@router.get('/related/{uid}', response_model=RelatedQueryResponse)
async def get_related(request: Request,
                      uid: str,
                      page_number: int = 1,
                      query_id: str = None):
    searcher = request.app.state.searcher
    related_searcher = request.app.state.related_searcher

    # Invalid uid -> 404
    if uid not in related_searcher.uid_set:
        raise HTTPException(status_code=404, detail="Item not found")

    source_vector = related_searcher.embedding[uid]
    related_results = []