Esempio n. 1
0
def compose_ia_url(limit=None, page=1, subject=None, query=None, work_id=None,
                   _type=None, sorts=None, advanced=True):
    """This needs to be exposed by a generalized API endpoint within
    plugins/openlibrary/api/browse which lets lazy-load more items for
    the homepage carousel and support the upcoming /browse view
    (backed by archive.org search, so we don't have to send users to
    archive.org to see more books)
    """
    from openlibrary.plugins.openlibrary.home import CAROUSELS_PRESETS
    query = CAROUSELS_PRESETS.get(query, query)
    q = 'openlibrary_work:(*)'

    # If we don't provide an openlibrary_subject and no collection is
    # specified in our query, we restrict our query to the `inlibrary`
    # collection (i.e. those books which are borrowable)
    if (not subject) and (not query or 'collection:' not in query):
        q += ' AND collection:(inlibrary)'
    # In the only case where we are not restricting our search to
    # borrowable books (i.e. `inlibrary`), we remove all the books
    # which are `printdisabled` *outside* of `inlibrary`.
    if 'collection:(inlibrary)' not in q:
        q += ' AND (collection:(inlibrary) OR (!collection:(printdisabled)))'

    # If no lending restrictions (e.g. borrow, read) are imposed in
    # our query, we assume only borrowable books will be included in
    # results (not unrestricted/open books).
    if (not query) or ('loans__status__status:' not in query):
        q += ' AND loans__status__status:AVAILABLE'
    if query:
        q += " AND " + query
    if subject:
        q += " AND openlibrary_subject:" + subject

    if work_id:
        if _type.lower() in ["authors", "subjects"]:
            _q = None
            works_authors_and_subjects = cached_work_authors_and_subjects(work_id)
            if works_authors_and_subjects:
                if _type == "authors":
                    authors = []
                    for author_name in works_authors_and_subjects.get('authors', []):
                        authors.append(author_name)
                        authors.append(','.join(author_name.split(' ', 1)[::-1]))
                    if authors:
                        _q = ' OR '.join('creator:"%s"' % author for author in authors)
                elif _type == "subjects":
                    subjects = works_authors_and_subjects.get('subjects', [])
                    if subjects:
                        _q = ' OR '.join('subject:"%s"' % subject for subject in subjects)
            if not _q:
                logger.error('compose_ia_url failed!', extra={
                    'limit': limit,
                    'page': page,
                    'subject': subject,
                    'query': query,
                    'work_id': work_id,
                    '_type': _type,
                    'sorts': sorts,
                    'advanced': advanced,
                })
                return ''  # TODO: Should we just raise an excpetion instead?
            q += ' AND (%s) AND !openlibrary_work:(%s)' % (_q, work_id.split('/')[-1])

    if not advanced:
        _sort = sorts[0] if sorts else ''
        if ' desc' in _sort:
            _sort = '-' + _sort.split(' desc')[0]
        elif ' asc' in _sort:
            _sort = _sort.split(' asc')[0]
        params = {'query': q}
        if _sort:
            params['sort'] = _sort
        return 'https://archive.org/search.php?' + urlencode(params)

    rows = limit or DEFAULT_IA_RESULTS
    params = [
        ('q', q),
        ('fl[]', 'identifier'),
        ('fl[]', 'openlibrary_edition'),
        ('fl[]', 'openlibrary_work'),
        ('fl[]', 'loans__status__status'),
        ('rows', rows),
        ('page', page),
        ('output', 'json'),
    ]
    if not sorts or not isinstance(sorts, list):
        sorts = ['']
    for sort in sorts:
        params.append(('sort[]', sort))
    base_url = "http://%s/advancedsearch.php" % config_bookreader_host
    return base_url + '?' + urlencode(params)
Esempio n. 2
0
def run_solr_query(param=None,
                   rows=100,
                   page=1,
                   sort=None,
                   spellcheck_count=None,
                   offset=None,
                   fields=None,
                   facet=True):
    param = param or {}

    # use page when offset is not specified
    if offset is None:
        offset = rows * (page - 1)

    (q_list, use_dismax) = build_q_list(param)
    params = [
        ('fl', ','.join(fields or [
            'key', 'author_name', 'author_key', 'title', 'subtitle',
            'edition_count', 'ia', 'has_fulltext', 'first_publish_year',
            'cover_i', 'cover_edition_key', 'public_scan_b',
            'lending_edition_s', 'lending_identifier_s', 'language',
            'ia_collection_s'
        ])),
        ('fq', 'type:work'),
        ('q.op', 'AND'),
        ('start', offset),
        ('rows', rows),
    ]

    if spellcheck_count is None:
        spellcheck_count = default_spellcheck_count

    if spellcheck_count:
        params.append(('spellcheck', 'true'))
        params.append(('spellcheck.count', spellcheck_count))

    if facet:
        params.append(('facet', 'true'))
        for facet in FACET_FIELDS:
            params.append(('facet.field', facet))

    if q_list:
        if use_dismax:
            params.append(('q', ' '.join(q_list)))
            params.append(('defType', 'dismax'))
            params.append(('qf', 'text title^5 author_name^5'))
            params.append(('bf', 'sqrt(edition_count)^10'))
        else:
            params.append(
                ('q', ' '.join(q_list + ['_val_:"sqrt(edition_count)"^10'])))

    if 'public_scan' in param:
        v = param.pop('public_scan').lower()
        if v in ('true', 'false'):
            if v == 'false':
                # also constrain on print disabled since the index may not be in sync
                param.setdefault('print_disabled', 'false')
            params.append(('fq', 'public_scan_b:%s' % v))

    if 'print_disabled' in param:
        v = param.pop('print_disabled').lower()
        if v in ('true', 'false'):
            minus = '-' if v == 'false' else ''
            params.append(('fq', '%ssubject_key:protected_daisy' % minus))

    if 'has_fulltext' in param:
        v = param['has_fulltext'].lower()
        if v not in ('true', 'false'):
            del param['has_fulltext']
        params.append(('fq', 'has_fulltext:%s' % v))

    for field in FACET_FIELDS:
        if field == 'has_fulltext':
            continue
        if field == 'author_facet':
            field = 'author_key'
        if field not in param:
            continue
        values = param[field]
        params += [('fq', '%s:"%s"' % (field, val)) for val in values if val]

    if sort:
        params.append(('sort', sort))

    params.append(('wt', param.get('wt', 'standard')))
    url = solr_select_url + '?' + urlencode(params)

    solr_result = execute_solr_query(url)
    if solr_result is None:
        return (None, url, q_list)
    reply = solr_result.read()
    return (reply, url, q_list)
Esempio n. 3
0
def compose_ia_url(
    limit: int = None,
    page: int = 1,
    subject=None,
    query=None,
    work_id=None,
    _type: Literal['authors', 'subjects'] = None,
    sorts=None,
    advanced=True,
) -> Optional[str]:
    """This needs to be exposed by a generalized API endpoint within
    plugins/api/browse which lets lazy-load more items for
    the homepage carousel and support the upcoming /browse view
    (backed by archive.org search, so we don't have to send users to
    archive.org to see more books)

    Returns None if we get an empty query
    """
    from openlibrary.plugins.openlibrary.home import CAROUSELS_PRESETS

    query = CAROUSELS_PRESETS.get(query, query)
    q = 'openlibrary_work:(*)'

    # If we don't provide an openlibrary_subject and no collection is
    # specified in our query, we restrict our query to the `inlibrary`
    # collection (i.e. those books which are borrowable)
    if (not subject) and (not query or 'collection:' not in query):
        q += ' AND collection:(inlibrary)'
    # In the only case where we are not restricting our search to
    # borrowable books (i.e. `inlibrary`), we remove all the books
    # which are `printdisabled` *outside* of `inlibrary`.
    if 'collection:(inlibrary)' not in q:
        q += ' AND (collection:(inlibrary) OR (!collection:(printdisabled)))'

    # If no lending restrictions (e.g. borrow, read) are imposed in
    # our query, we assume only borrowable books will be included in
    # results (not unrestricted/open books).
    lendable = (
        '(lending___available_to_browse:true OR lending___available_to_borrow:true)'
    )
    if (not query) or lendable not in query:
        q += ' AND ' + lendable
    if query:
        q += " AND " + query
    if subject:
        q += " AND openlibrary_subject:" + subject

    if work_id and _type in ("authors", "subjects"):
        _q = None
        works_authors_and_subjects = cached_work_authors_and_subjects(work_id)
        if _type == "authors":
            authors = works_authors_and_subjects.get('authors', [])
            if not authors:
                return None
            name_variations = [
                variation for name in authors
                for variation in (name, ','.join(name.split(' ', 1)[::-1]))
            ]

            _q = ' OR '.join(f'creator:"{name}"' for name in name_variations)
        elif _type == "subjects":
            subjects = works_authors_and_subjects.get('subjects', [])
            if not subjects:
                return None
            _q = ' OR '.join(f'subject:"{subject}"' for subject in subjects)
        q += ' AND ({}) AND !openlibrary_work:({})'.format(
            _q,
            work_id.split('/')[-1])

    if not advanced:
        _sort = sorts[0] if sorts else ''
        if ' desc' in _sort:
            _sort = '-' + _sort.split(' desc')[0]
        elif ' asc' in _sort:
            _sort = _sort.split(' asc')[0]
        simple_params = {'query': q}
        if _sort:
            simple_params['sort'] = _sort
        return 'https://archive.org/search.php?' + urlencode(simple_params)

    rows = limit or DEFAULT_IA_RESULTS
    params = [
        ('q', q),
        ('fl[]', 'identifier'),
        ('fl[]', 'openlibrary_edition'),
        ('fl[]', 'openlibrary_work'),
        ('rows', rows),
        ('page', page),
        ('output', 'json'),
    ]
    if not sorts or not isinstance(sorts, list):
        sorts = ['']
    for sort in sorts:
        params.append(('sort[]', sort))
    base_url = "http://%s/advancedsearch.php" % config_bookreader_host
    return base_url + '?' + urlencode(params)