def compose_ia_url(limit=None, page=1, subject=None, query=None, work_id=None, _type=None, sorts=None, advanced=True): """This needs to be exposed by a generalized API endpoint within plugins/openlibrary/api/browse which lets lazy-load more items for the homepage carousel and support the upcoming /browse view (backed by archive.org search, so we don't have to send users to archive.org to see more books) """ from openlibrary.plugins.openlibrary.home import CAROUSELS_PRESETS query = CAROUSELS_PRESETS.get(query, query) q = 'openlibrary_work:(*)' # If we don't provide an openlibrary_subject and no collection is # specified in our query, we restrict our query to the `inlibrary` # collection (i.e. those books which are borrowable) if (not subject) and (not query or 'collection:' not in query): q += ' AND collection:(inlibrary)' # In the only case where we are not restricting our search to # borrowable books (i.e. `inlibrary`), we remove all the books # which are `printdisabled` *outside* of `inlibrary`. if 'collection:(inlibrary)' not in q: q += ' AND (collection:(inlibrary) OR (!collection:(printdisabled)))' # If no lending restrictions (e.g. borrow, read) are imposed in # our query, we assume only borrowable books will be included in # results (not unrestricted/open books). if (not query) or ('loans__status__status:' not in query): q += ' AND loans__status__status:AVAILABLE' if query: q += " AND " + query if subject: q += " AND openlibrary_subject:" + subject if work_id: if _type.lower() in ["authors", "subjects"]: _q = None works_authors_and_subjects = cached_work_authors_and_subjects(work_id) if works_authors_and_subjects: if _type == "authors": authors = [] for author_name in works_authors_and_subjects.get('authors', []): authors.append(author_name) authors.append(','.join(author_name.split(' ', 1)[::-1])) if authors: _q = ' OR '.join('creator:"%s"' % author for author in authors) elif _type == "subjects": subjects = works_authors_and_subjects.get('subjects', []) if subjects: _q = ' OR '.join('subject:"%s"' % subject for subject in subjects) if not _q: logger.error('compose_ia_url failed!', extra={ 'limit': limit, 'page': page, 'subject': subject, 'query': query, 'work_id': work_id, '_type': _type, 'sorts': sorts, 'advanced': advanced, }) return '' # TODO: Should we just raise an excpetion instead? q += ' AND (%s) AND !openlibrary_work:(%s)' % (_q, work_id.split('/')[-1]) if not advanced: _sort = sorts[0] if sorts else '' if ' desc' in _sort: _sort = '-' + _sort.split(' desc')[0] elif ' asc' in _sort: _sort = _sort.split(' asc')[0] params = {'query': q} if _sort: params['sort'] = _sort return 'https://archive.org/search.php?' + urlencode(params) rows = limit or DEFAULT_IA_RESULTS params = [ ('q', q), ('fl[]', 'identifier'), ('fl[]', 'openlibrary_edition'), ('fl[]', 'openlibrary_work'), ('fl[]', 'loans__status__status'), ('rows', rows), ('page', page), ('output', 'json'), ] if not sorts or not isinstance(sorts, list): sorts = [''] for sort in sorts: params.append(('sort[]', sort)) base_url = "http://%s/advancedsearch.php" % config_bookreader_host return base_url + '?' + urlencode(params)
def run_solr_query(param=None, rows=100, page=1, sort=None, spellcheck_count=None, offset=None, fields=None, facet=True): param = param or {} # use page when offset is not specified if offset is None: offset = rows * (page - 1) (q_list, use_dismax) = build_q_list(param) params = [ ('fl', ','.join(fields or [ 'key', 'author_name', 'author_key', 'title', 'subtitle', 'edition_count', 'ia', 'has_fulltext', 'first_publish_year', 'cover_i', 'cover_edition_key', 'public_scan_b', 'lending_edition_s', 'lending_identifier_s', 'language', 'ia_collection_s' ])), ('fq', 'type:work'), ('q.op', 'AND'), ('start', offset), ('rows', rows), ] if spellcheck_count is None: spellcheck_count = default_spellcheck_count if spellcheck_count: params.append(('spellcheck', 'true')) params.append(('spellcheck.count', spellcheck_count)) if facet: params.append(('facet', 'true')) for facet in FACET_FIELDS: params.append(('facet.field', facet)) if q_list: if use_dismax: params.append(('q', ' '.join(q_list))) params.append(('defType', 'dismax')) params.append(('qf', 'text title^5 author_name^5')) params.append(('bf', 'sqrt(edition_count)^10')) else: params.append( ('q', ' '.join(q_list + ['_val_:"sqrt(edition_count)"^10']))) if 'public_scan' in param: v = param.pop('public_scan').lower() if v in ('true', 'false'): if v == 'false': # also constrain on print disabled since the index may not be in sync param.setdefault('print_disabled', 'false') params.append(('fq', 'public_scan_b:%s' % v)) if 'print_disabled' in param: v = param.pop('print_disabled').lower() if v in ('true', 'false'): minus = '-' if v == 'false' else '' params.append(('fq', '%ssubject_key:protected_daisy' % minus)) if 'has_fulltext' in param: v = param['has_fulltext'].lower() if v not in ('true', 'false'): del param['has_fulltext'] params.append(('fq', 'has_fulltext:%s' % v)) for field in FACET_FIELDS: if field == 'has_fulltext': continue if field == 'author_facet': field = 'author_key' if field not in param: continue values = param[field] params += [('fq', '%s:"%s"' % (field, val)) for val in values if val] if sort: params.append(('sort', sort)) params.append(('wt', param.get('wt', 'standard'))) url = solr_select_url + '?' + urlencode(params) solr_result = execute_solr_query(url) if solr_result is None: return (None, url, q_list) reply = solr_result.read() return (reply, url, q_list)
def compose_ia_url( limit: int = None, page: int = 1, subject=None, query=None, work_id=None, _type: Literal['authors', 'subjects'] = None, sorts=None, advanced=True, ) -> Optional[str]: """This needs to be exposed by a generalized API endpoint within plugins/api/browse which lets lazy-load more items for the homepage carousel and support the upcoming /browse view (backed by archive.org search, so we don't have to send users to archive.org to see more books) Returns None if we get an empty query """ from openlibrary.plugins.openlibrary.home import CAROUSELS_PRESETS query = CAROUSELS_PRESETS.get(query, query) q = 'openlibrary_work:(*)' # If we don't provide an openlibrary_subject and no collection is # specified in our query, we restrict our query to the `inlibrary` # collection (i.e. those books which are borrowable) if (not subject) and (not query or 'collection:' not in query): q += ' AND collection:(inlibrary)' # In the only case where we are not restricting our search to # borrowable books (i.e. `inlibrary`), we remove all the books # which are `printdisabled` *outside* of `inlibrary`. if 'collection:(inlibrary)' not in q: q += ' AND (collection:(inlibrary) OR (!collection:(printdisabled)))' # If no lending restrictions (e.g. borrow, read) are imposed in # our query, we assume only borrowable books will be included in # results (not unrestricted/open books). lendable = ( '(lending___available_to_browse:true OR lending___available_to_borrow:true)' ) if (not query) or lendable not in query: q += ' AND ' + lendable if query: q += " AND " + query if subject: q += " AND openlibrary_subject:" + subject if work_id and _type in ("authors", "subjects"): _q = None works_authors_and_subjects = cached_work_authors_and_subjects(work_id) if _type == "authors": authors = works_authors_and_subjects.get('authors', []) if not authors: return None name_variations = [ variation for name in authors for variation in (name, ','.join(name.split(' ', 1)[::-1])) ] _q = ' OR '.join(f'creator:"{name}"' for name in name_variations) elif _type == "subjects": subjects = works_authors_and_subjects.get('subjects', []) if not subjects: return None _q = ' OR '.join(f'subject:"{subject}"' for subject in subjects) q += ' AND ({}) AND !openlibrary_work:({})'.format( _q, work_id.split('/')[-1]) if not advanced: _sort = sorts[0] if sorts else '' if ' desc' in _sort: _sort = '-' + _sort.split(' desc')[0] elif ' asc' in _sort: _sort = _sort.split(' asc')[0] simple_params = {'query': q} if _sort: simple_params['sort'] = _sort return 'https://archive.org/search.php?' + urlencode(simple_params) rows = limit or DEFAULT_IA_RESULTS params = [ ('q', q), ('fl[]', 'identifier'), ('fl[]', 'openlibrary_edition'), ('fl[]', 'openlibrary_work'), ('rows', rows), ('page', page), ('output', 'json'), ] if not sorts or not isinstance(sorts, list): sorts = [''] for sort in sorts: params.append(('sort[]', sort)) base_url = "http://%s/advancedsearch.php" % config_bookreader_host return base_url + '?' + urlencode(params)