Beispiel #1
0
 def test_query_only(self):
     """User has entered only a query (value); this should never happen."""
     data = MultiDict({
         'query': 'someone monkeyed with the request'
     })
     form = SimpleSearchForm(data)
     self.assertFalse(form.validate(), "Form should be invalid")
Beispiel #2
0
 def test_searchtype_only(self):
     """User has entered only a searchtype (field)."""
     data = MultiDict({
         'searchtype': 'title'
     })
     form = SimpleSearchForm(data)
     self.assertFalse(form.validate(), "Form should be invalid")
Beispiel #3
0
 def test_querystring_has_wildcard_at_start(self):
     """Querystring starts with a wildcard."""
     data = MultiDict({
         'searchtype': 'title',
         'query': '*foo title'
     })
     form = SimpleSearchForm(data)
     self.assertFalse(form.validate(), "Form should be invalid")
Beispiel #4
0
 def test_query_and_searchtype(self):
     """User has entered a searchtype (field) and query (value)."""
     data = MultiDict({
         'searchtype': 'title',
         'query': 'foo title'
     })
     form = SimpleSearchForm(data)
     self.assertTrue(form.validate(), "Form should be valid")
Beispiel #5
0
 def test_input_whitespace_is_stripped(self):
     """If query has padding whitespace, it should be removed."""
     data = MultiDict({
         'searchtype': 'title',
         'query': ' foo title '
     })
     form = SimpleSearchForm(data)
     self.assertTrue(form.validate(), "Form should be valid.")
     self.assertEqual(form.query.data, 'foo title')
Beispiel #6
0
 def test_multiple_simple(self):
     """Form data has three simple."""
     data = MultiDict({"searchtype": "title", "query": "foo title"})
     form = SimpleSearchForm(data)
     query = simple._query_from_form(form)
     self.assertIsInstance(query, SimpleQuery,
                           "Should return an instance of SimpleQuery")
Beispiel #7
0
 def test_form_data_has_order(self):
     """Form data includes sort order."""
     data = MultiDict({
         "searchtype": "title",
         "query": "foo title",
         "order": "submitted_date",
     })
     form = SimpleSearchForm(data)
     query = simple._query_from_form(form)
     self.assertIsInstance(query, SimpleQuery,
                           "Should return an instance of SimpleQuery")
     self.assertEqual(query.order, "submitted_date")
Beispiel #8
0
 def test_form_data_has_no_order(self):
     """Form data includes sort order parameter, but it is 'None'."""
     data = MultiDict({
         'searchtype': 'title',
         'query': 'foo title',
         'order': 'None'    #
     })
     form = SimpleSearchForm(data)
     query = simple._query_from_form(form)
     self.assertIsInstance(query, SimpleQuery,
                           "Should return an instance of SimpleQuery")
     self.assertIsNone(query.order, "Order should be None")
Beispiel #9
0
 def test_form_data_has_order(self):
     """Form data includes sort order."""
     data = MultiDict({
         'searchtype': 'title',
         'query': 'foo title',
         'order': 'submitted_date'
     })
     form = SimpleSearchForm(data)
     query = simple._query_from_form(form)
     self.assertIsInstance(query, SimpleQuery,
                           "Should return an instance of SimpleQuery")
     self.assertEqual(query.order, 'submitted_date')
Beispiel #10
0
 def test_request_includes_cookie_but_also_explicit_val(self, mock_simple):
     """The request includes the cookie, but also an explicit value."""
     mock_simple.search.return_value = {"form": SimpleSearchForm()}, 200, {}
     ui.PARAMS_TO_PERSIST = ["foo", "baz"]
     ui.PARAMS_COOKIE_NAME = "foo-cookie"
     self.client.set_cookie("", ui.PARAMS_COOKIE_NAME,
                            json.dumps({"foo": "ack"}))
     self.client.get("/?foo=oof")
     self.assertEqual(
         mock_simple.search.call_args[0][0]["foo"],
         "oof",
         "The explicit value should be used",
     )
Beispiel #11
0
 def test_request_includes_cookie(self, mock_simple):
     """The request includes the params cookie."""
     mock_simple.search.return_value = {"form": SimpleSearchForm()}, 200, {}
     ui.PARAMS_TO_PERSIST = ["foo", "baz"]
     ui.PARAMS_COOKIE_NAME = "foo-cookie"
     self.client.set_cookie("", ui.PARAMS_COOKIE_NAME,
                            json.dumps({"foo": "ack"}))
     self.client.get("/")
     self.assertEqual(
         mock_simple.search.call_args[0][0]["foo"],
         "ack",
         "The value in the cookie should be used",
     )
Beispiel #12
0
    def test_querystring_has_unbalanced_quotes(self):
        """Querystring has an odd number of quote characters."""
        data = MultiDict({"searchtype": "title", "query": '"rhubarb'})
        form = SimpleSearchForm(data)
        self.assertFalse(form.validate(), "Form should be invalid")

        data["query"] = '"rhubarb"'
        form = SimpleSearchForm(data)
        self.assertTrue(form.validate(), "Form should be valid")

        data["query"] = '"rhubarb" "pie'
        form = SimpleSearchForm(data)
        self.assertFalse(form.validate(), "Form should be invalid")

        data["query"] = '"rhubarb" "pie"'
        form = SimpleSearchForm(data)
        self.assertTrue(form.validate(), "Form should be valid")
Beispiel #13
0
def search(request_params: MultiDict,
           archives: Optional[List[str]] = None) -> Response:
    """
    Perform a simple search.

    This supports requests from both the form-based view (provided here) AND
    from the mini search widget displayed on all arXiv.org pages.

    At a minimum, expects the parameter ``value`` in the GET request. This may
    be a match value for a search query, or an arXiv ID.

    Parameters
    ----------
    request_params : :class:`.MultiDict`
    archives : list
        A list of archives within which the search should be performed.

    Returns
    -------
    dict
        Search result response data.
    int
        HTTP status code.
    dict
        Headers to add to the response.

    Raises
    ------
    :class:`.InternalServerError`
        Raised when there is a problem communicating with ES, or there was an
        unexpected problem executing the query.

    """
    if archives is not None and len(archives) == 0:
        raise NotFound("No such archive")

    # We may need to intervene on the request parameters, so we'll
    # reinstantiate as a mutable MultiDict.
    if isinstance(request_params, ImmutableMultiDict):
        request_params = MultiDict(request_params.items(multi=True))

    logger.debug("simple search form")
    response_data = {}  # type: Dict[str, Any]

    logger.debug("simple search request")
    if "query" in request_params:
        try:
            # first check if the URL includes an arXiv ID
            arxiv_id: Optional[str] = identifier.parse_arxiv_id(
                request_params["query"])
            # If so, redirect.
            logger.debug(f"got arXiv ID: {arxiv_id}")
        except ValueError:
            logger.debug("No arXiv ID detected; fall back to form")
            arxiv_id = None
    else:
        arxiv_id = None

    if arxiv_id:
        headers = {"Location": url_for("abs_by_id", paper_id=arxiv_id)}
        return {}, HTTPStatus.MOVED_PERMANENTLY, headers

    # Here we intervene on the user's query to look for holdouts from the
    # classic search system's author indexing syntax (surname_f). We
    # rewrite with a comma, and show a warning to the user about the
    # change.
    response_data["has_classic_format"] = False
    if "searchtype" in request_params and "query" in request_params:
        if request_params["searchtype"] in ["author", "all"]:
            _query, _classic = catch_underscore_syntax(request_params["query"])
            response_data["has_classic_format"] = _classic
            request_params["query"] = _query

    # Fall back to form-based search.
    form = SimpleSearchForm(request_params)

    if form.query.data:
        # Temporary workaround to support classic help search
        if form.searchtype.data == "help":
            return (
                {},
                HTTPStatus.MOVED_PERMANENTLY,
                {
                    "Location": f"/help/search?q={form.query.data}"
                },
            )

        # Support classic "expeirmental" search
        elif form.searchtype.data == "full_text":
            return (
                {},
                HTTPStatus.MOVED_PERMANENTLY,
                {
                    "Location":
                    "http://search.arxiv.org:8081/"
                    f"?in=&query={form.query.data}"
                },
            )

    q: Optional[Query]
    if form.validate():
        logger.debug("form is valid")
        q = _query_from_form(form)

        if archives is not None:
            q = _update_with_archives(q, archives)

        # Pagination is handled outside of the form.
        q = paginate(q, request_params)

        try:
            # Execute the search. We'll use the results directly in
            #  template rendering, so they get added directly to the
            #  response content.asdict
            response_data.update(SearchSession.search(q))  # type: ignore
        except index.IndexConnectionError as ex:
            # There was a (hopefully transient) connection problem. Either
            #  this will clear up relatively quickly (next request), or
            #  there is a more serious outage.
            logger.error("IndexConnectionError: %s", ex)
            raise InternalServerError(
                "There was a problem connecting to the search index. This is "
                "quite likely a transient issue, so please try your search "
                "again. If this problem persists, please report it to "
                "[email protected].") from ex
        except index.QueryError as ex:
            # Base exception routers should pick this up and show bug page.
            logger.error("QueryError: %s", ex)
            raise InternalServerError(
                "There was a problem executing your query. Please try your "
                "search again.  If this problem persists, please report it to "
                "[email protected].") from ex
        except index.OutsideAllowedRange as ex:
            raise BadRequest(
                "Hello clever friend. You can't get results in that range"
                " right now.") from ex

        except Exception as ex:
            logger.error("Unhandled exception: %s", str(ex))
            raise
    else:
        logger.debug("form is invalid: %s", str(form.errors))
        if "order" in form.errors or "size" in form.errors:
            # It's likely that the user tried to set these parameters manually,
            # or that the search originated from somewhere else (and was
            # configured incorrectly).
            simple_url = url_for("ui.search")
            raise BadRequest(
                f"It looks like there's something odd about your search"
                f" request. Please try <a href='{simple_url}'>starting"
                f" over</a>.")
        q = None
    response_data["query"] = q
    response_data["form"] = form
    return response_data, HTTPStatus.OK, {}