Beispiel #1
0
 def test_underscore_is_rewritten(self):
     """User searches for an author name with `surname_f` format."""
     query = "franklin_r"
     after, classic_name = catch_underscore_syntax(query)
     self.assertEqual(
         after,
         "franklin, r",
         "The underscore should be replaced with `, `.",
     )
     self.assertTrue(classic_name, "Should be identified as classic")
Beispiel #2
0
 def test_multiple_authors(self):
     """The user passes more than one name in classic format."""
     # E-gads.
     query = "franklin_r dole_b"
     after, classic_name = catch_underscore_syntax(query)
     self.assertEqual(
         after,
         "franklin, r; dole, b",
         "The underscore should be replaced with `, `.",
     )
     self.assertTrue(classic_name, "Should be identified as classic")
Beispiel #3
0
def search(request_params: MultiDict) -> Response:
    """
    Perform a search from the advanced search interface.

    This is intended to support ONLY form-based search, to replace the classic
    advanced search view.

    Parameters
    ----------
    request_params : dict

    Returns
    -------
    dict
        Response content.
    int
        HTTP status code.
    dict
        Extra headers to add to the response.

    Raises
    ------
    InternalServerError
        Raised when there is an unrecoverable error while interacting with the
        search index.

    """
    # We may need to intervene on the request parameters, so we'll
    # reinstantiate as a mutable MultiDict.
    if isinstance(request_params, ImmutableMultiDict):
        request_params = MultiDict(request_params.items(multi=True))

    logger.debug('search request from advanced form')
    response_data: Dict[str, Any] = {}
    response_data['show_form'] = ('advanced' not in request_params)
    logger.debug('show_form: %s', str(response_data['show_form']))

    # Here we intervene on the user's query to look for holdouts from
    # the classic search system's author indexing syntax (surname_f). We
    # rewrite with a comma, and show a warning to the user about the
    # change.
    has_classic = False
    for key, value in request_params.items():
        if value is None:
            continue
        match = TERM_FIELD_PTN.search(key)
        if match is None:
            continue
        value = str(value)
        i = match.group(1)
        field = request_params.get(f'terms-{i}-field')
        # We are only looking for this syntax in the author search, or
        # in an all-fields search.
        if field not in ['all', 'author']:
            continue

        value, _has_classic = catch_underscore_syntax(value)
        has_classic = _has_classic if not has_classic else has_classic
        request_params.setlist(key, [value])

    response_data['has_classic_format'] = has_classic
    form = forms.AdvancedSearchForm(request_params)
    q: Optional[Query]
    # We want to avoid attempting to validate if no query has been entered.
    #  If a query was actually submitted via the form, 'advanced' will be
    #  present in the request parameters.
    if 'advanced' in request_params:

        if form.validate():
            logger.debug('form is valid')
            q = _query_from_form(form)

            # Pagination is handled outside of the form.
            q = paginate(q, request_params)

            try:
                # Execute the search. We'll use the results directly in
                #  template rendering, so they get added directly to the
                #  response content. asdict(
                response_data.update(SearchSession.search(q))  # type: ignore
            except index.IndexConnectionError as e:
                # There was a (hopefully transient) connection problem. Either
                #  this will clear up relatively quickly (next request), or
                #  there is a more serious outage.
                logger.error('IndexConnectionError: %s', e)
                raise InternalServerError(
                    "There was a problem connecting to the search index. This "
                    "is quite likely a transient issue, so please try your "
                    "search again. If this problem persists, please report it "
                    "to [email protected].") from e
            except index.QueryError as e:
                # Base exception routers should pick this up and show bug page.
                logger.error('QueryError: %s', e)
                raise InternalServerError(
                    "There was a problem executing your query. Please try "
                    "your search again.  If this problem persists, please "
                    "report it to [email protected].") from e
            except index.OutsideAllowedRange as e:
                raise BadRequest(
                    "Hello clever friend. You can't get results in that range"
                    " right now.") from e
            response_data['query'] = q
        else:
            logger.debug('form is invalid: %s', str(form.errors))
            if 'order' in form.errors or 'size' in form.errors:
                # It's likely that the user tried to set these parameters
                # manually, or that the search originated from somewhere else
                # (and was configured incorrectly).
                advanced_url = url_for('ui.advanced_search')
                raise BadRequest(
                    f"It looks like there's something odd about your search"
                    f" request. Please try <a href='{advanced_url}'>starting"
                    f" over</a>.")

            # Force the form to be displayed, so that we can render errors.
            #  This has most likely occurred due to someone manually crafting
            #  a GET response, but it could be something else.
            response_data['show_form'] = True

    # We want the form handy even when it is not shown to the user. For
    #  example, we can generate new form-friendly requests to update sort
    #  order and page size by embedding the form (hidden).
    response_data['form'] = form
    return response_data, status.HTTP_200_OK, {}
Beispiel #4
0
def search(request_params: MultiDict,
           archives: Optional[List[str]] = None) -> Response:
    """
    Perform a simple search.

    This supports requests from both the form-based view (provided here) AND
    from the mini search widget displayed on all arXiv.org pages.

    At a minimum, expects the parameter ``value`` in the GET request. This may
    be a match value for a search query, or an arXiv ID.

    Parameters
    ----------
    request_params : :class:`.MultiDict`
    archives : list
        A list of archives within which the search should be performed.

    Returns
    -------
    dict
        Search result response data.
    int
        HTTP status code.
    dict
        Headers to add to the response.

    Raises
    ------
    :class:`.InternalServerError`
        Raised when there is a problem communicating with ES, or there was an
        unexpected problem executing the query.

    """
    if archives is not None and len(archives) == 0:
        raise NotFound('No such archive')

    # We may need to intervene on the request parameters, so we'll
    # reinstantiate as a mutable MultiDict.
    if isinstance(request_params, ImmutableMultiDict):
        request_params = MultiDict(request_params.items(multi=True))

    logger.debug('simple search form')
    response_data = {}  # type: Dict[str, Any]

    logger.debug('simple search request')
    if 'query' in request_params:
        try:
            # first check if the URL includes an arXiv ID
            arxiv_id: Optional[str] = identifier.parse_arxiv_id(
                request_params['query'])
            # If so, redirect.
            logger.debug(f"got arXiv ID: {arxiv_id}")
        except ValueError as e:
            logger.debug('No arXiv ID detected; fall back to form')
            arxiv_id = None
    else:
        arxiv_id = None

    if arxiv_id:
        headers = {'Location': url_for('abs_by_id', paper_id=arxiv_id)}
        return {}, status.HTTP_301_MOVED_PERMANENTLY, headers

    # Here we intervene on the user's query to look for holdouts from the
    # classic search system's author indexing syntax (surname_f). We
    # rewrite with a comma, and show a warning to the user about the
    # change.
    response_data['has_classic_format'] = False
    if 'searchtype' in request_params and 'query' in request_params:
        if request_params['searchtype'] in ['author', 'all']:
            _query, _classic = catch_underscore_syntax(request_params['query'])
            response_data['has_classic_format'] = _classic
            request_params['query'] = _query

    # Fall back to form-based search.
    form = SimpleSearchForm(request_params)

    if form.query.data:
        # Temporary workaround to support classic help search
        if form.searchtype.data == 'help':
            return {}, status.HTTP_301_MOVED_PERMANENTLY,\
                {'Location': f'/help/search?q={form.query.data}'}

        # Support classic "expeirmental" search
        elif form.searchtype.data == 'full_text':
            return {}, status.HTTP_301_MOVED_PERMANENTLY,\
                {'Location': 'http://search.arxiv.org:8081/'
                             f'?in=&query={form.query.data}'}

    q: Optional[Query]
    if form.validate():
        logger.debug('form is valid')
        q = _query_from_form(form)

        if archives is not None:
            q = _update_with_archives(q, archives)

        # Pagination is handled outside of the form.
        q = paginate(q, request_params)

        try:
            # Execute the search. We'll use the results directly in
            #  template rendering, so they get added directly to the
            #  response content.
            response_data.update(asdict(index.search(q)))
        except index.IndexConnectionError as e:
            # There was a (hopefully transient) connection problem. Either
            #  this will clear up relatively quickly (next request), or
            #  there is a more serious outage.
            logger.error('IndexConnectionError: %s', e)
            raise InternalServerError(
                "There was a problem connecting to the search index. This is "
                "quite likely a transient issue, so please try your search "
                "again. If this problem persists, please report it to "
                "[email protected].") from e
        except index.QueryError as e:
            # Base exception routers should pick this up and show bug page.
            logger.error('QueryError: %s', e)
            raise InternalServerError(
                "There was a problem executing your query. Please try your "
                "search again.  If this problem persists, please report it to "
                "[email protected].") from e
        except index.OutsideAllowedRange as e:
            raise BadRequest(
                "Hello clever friend. You can't get results in that range"
                " right now.") from e

        except Exception as e:
            logger.error('Unhandled exception: %s', str(e))
            raise
    else:
        logger.debug('form is invalid: %s', str(form.errors))
        if 'order' in form.errors or 'size' in form.errors:
            # It's likely that the user tried to set these parameters manually,
            # or that the search originated from somewhere else (and was
            # configured incorrectly).
            simple_url = url_for('ui.search')
            raise BadRequest(
                f"It looks like there's something odd about your search"
                f" request. Please try <a href='{simple_url}'>starting"
                f" over</a>.")
        q = None
    response_data['query'] = q
    response_data['form'] = form
    return response_data, status.HTTP_200_OK, {}
Beispiel #5
0
 def test_nonsense_input(self):
     """Garbage input is passed."""
     try:
         catch_underscore_syntax("")
     except Exception as ex:
         self.fail(ex)
Beispiel #6
0
 def test_false_positive(self):
     """The underscore is followed by more than one character."""
     query = "not_aname"
     after, classic_name = catch_underscore_syntax(query)
     self.assertEqual(query, after, "The query should not be rewritten")
     self.assertFalse(classic_name, "Should not be identified as classic")