def q(searcher, q, **options): options = {key.partition('.')[-1]: options[key] for key in options if key.startswith('q.')} field = options.pop('field', []) fields = [field] if isinstance(field, basestring) else field fields = [name.partition('^')[::2] for name in fields] if any(boost for name, boost in fields): field = {name: float(boost or 1.0) for name, boost in fields} elif isinstance(field, basestring): (field, boost), = fields else: field = [name for name, boost in fields] or '' if 'type' in options: with HTTPError(httplib.BAD_REQUEST, AttributeError): return getattr(engine.Query, options['type'])(field, q) for key in set(options) - {'op', 'version'}: with HTTPError(httplib.BAD_REQUEST, ValueError): options[key] = json.loads(options[key]) if q is not None: with HTTPError(httplib.BAD_REQUEST, lucene.JavaError): return searcher.parse(q, field=field, **options)
def search(self, q=None, count=None, start=0, fields=None, sort=None, facets='', group='', hl='', mlt=None, spellcheck=0, timeout=None, **options): """Run query and return documents. **GET** /search? Return array of document objects and total doc count. &q=\ *chars*\ &q.type=[term|prefix|wildcard]&q.\ *chars*\ =..., query, optional type to skip parsing, and optional parser settings: q.field, q.op,... &filter=\ *chars* | cached filter applied to the query | if a previously cached filter is not found, the value will be parsed as a query &count=\ *int*\ &start=0 maximum number of docs to return and offset to start at &fields=\ *chars*,... &fields.multi=\ *chars*,... &fields.indexed=\ *chars*\ [:*chars*],... only include selected stored fields; multi-valued fields returned in an array; indexed fields with optional type are cached &sort=\ [-]\ *chars*\ [:*chars*],... &sort.scores[=max] | field name, optional type, minus sign indicates descending | optionally score docs, additionally compute maximum score &facets=\ *chars*,... &facets.count=\ *int*\&facets.min=0 | include facet counts for given field names; facets filters are cached | optional maximum number of most populated facet values per field, and minimum count to return &group=\ *chars*\ [:*chars*]&group.count=1 | group documents by field value with optional type, up to given maximum count .. versionchanged:: 1.6 grouping searches use count and start options &hl=\ *chars*,... &hl.count=1&hl.tag=strong&hl.enable=[fields|terms] | stored fields to return highlighted | optional maximum fragment count and html tag name | optionally enable matching any field or any term &mlt=\ *int*\ &mlt.fields=\ *chars*,... &mlt.\ *chars*\ =..., | doc index (or id without a query) to find MoreLikeThis | optional document fields to match | optional MoreLikeThis settings: mlt.minTermFreq, mlt.minDocFreq,... &spellcheck=\ *int* | maximum number of spelling corrections to return for each query term, grouped by field | original query is still run; use q.spellcheck=true to affect query parsing &timeout=\ *number* timeout search after elapsed number of seconds :return: | { | "query": *string*\|null, | "count": *int*\|null, | "maxscore": *number*\|null, | "docs": [{"__id__": *int*, "__score__": *number*, "__keys__": *array*, "__highlights__": {*string*: *array*,... }, *string*: *value*,... },... ], | "facets": {*string*: {*string*: *int*,... },... }, | "groups": [{"count": *int*, "value": *value*, "docs": [*object*,... ]},... ] | "spellcheck": {*string*: {*string*: [*string*,... ],... },... }, | } """ searcher = self.searcher if sort is not None: sort = (re.match('(-?)(\w+):?(\w*)', field).groups() for field in sort) sort = [(name, (type or 'string'), (reverse == '-')) for reverse, name, type in sort] with HTTPError(httplib.BAD_REQUEST, AttributeError): sort = [searcher.sorter(name, type, reverse=reverse) for name, type, reverse in sort] q = parse.q(searcher, q, **options) qfilter = options.pop('filter', None) if qfilter is not None and qfilter not in searcher.filters: searcher.filters[qfilter] = engine.Query.filter(parse.q(searcher, qfilter, **options)) qfilter = searcher.filters.get(qfilter) if mlt is not None: if q is not None: mlt, = searcher.search(q, count=mlt + 1, sort=sort)[mlt:].ids mltfields = options.pop('mlt.fields', ()) with HTTPError(httplib.BAD_REQUEST, ValueError): attrs = {key.partition('.')[-1]: json.loads(options[key]) for key in options if key.startswith('mlt.')} q = searcher.morelikethis(mlt, *mltfields, analyzer=searcher.analyzer, **attrs) if count is not None: count += start if count == 0: start = count = 1 scores = options.get('sort.scores') gcount = options.get('group.count', 1) scores = {'scores': scores is not None, 'maxscore': scores == 'max'} if ':' in group or group in searcher.sorters: hits = searcher.search(q, filter=qfilter, sort=sort, timeout=timeout, **scores) with HTTPError(httplib.BAD_REQUEST, AttributeError): groups = hits.groupby(searcher.comparator(*group.split(':')).__getitem__, count=count, docs=gcount) groups.groupdocs = groups.groupdocs[start:] elif group: scores = {'includeScores': scores['scores'], 'includeMaxScore': scores['maxscore']} groups = searcher.groupby(group, q, qfilter, count, start, sort=sort, groupDocsLimit=gcount, **scores) else: hits = searcher.search(q, filter=qfilter, sort=sort, count=count, timeout=timeout, **scores) groups = engine.documents.Groups(searcher, [hits[start:]], hits.count, hits.maxscore) result = {'query': q and unicode(q), 'count': groups.count, 'maxscore': groups.maxscore} tag, enable = options.get('hl.tag', 'strong'), options.get('hl.enable', '') hlcount = options.get('hl.count', 1) if hl: hl = {name: searcher.highlighter(q, name, terms='terms' in enable, fields='fields' in enable, tag=tag) for name in hl} fields, multi, indexed = parse.fields(searcher, fields, **options) if fields is None: fields = {} else: groups.select(*itertools.chain(fields, multi)) result['groups'] = [] for hits in groups: docs = [] for hit in hits: doc = hit.dict(*multi, **fields) doc.update((name, indexed[name][hit.id]) for name in indexed) fragments = (hl[name].fragments(hit.id, hlcount) for name in hl) # pragma: no branch if hl: doc['__highlights__'] = {name: value for name, value in zip(hl, fragments) if value is not None} docs.append(doc) result['groups'].append({'docs': docs, 'count': hits.count, 'value': getattr(hits, 'value', None)}) if not group: result['docs'] = result.pop('groups')[0]['docs'] q = q or engine.Query.alldocs() if facets: facets = (tuple(facet.split(':')) if ':' in facet else facet for facet in facets) facets = result['facets'] = searcher.facets(q, *facets) for counts in facets.values(): counts.pop(None, None) if 'facets.min' in options: for name, counts in facets.items(): facets[name] = {term: count for term, count in counts.items() if count >= options['facets.min']} if 'facets.count' in options: for name, counts in facets.items(): facets[name] = {term: counts[term] for term in heapq.nlargest(options['facets.count'], counts, key=counts.__getitem__)} if spellcheck: terms = result['spellcheck'] = collections.defaultdict(dict) for name, value in engine.Query.terms(q): terms[name][value] = list(itertools.islice(searcher.correct(name, value), spellcheck)) return result
mount(root, path, config, autoupdate) cherrypy.quickstart(cherrypy.tree.apps.get(path), path, config) parser = argparse.ArgumentParser(description='Restful json cherrypy server.', prog='lupyne.server') parser.add_argument('directories', nargs='*', metavar='directory', help='index directories') parser.add_argument('-r', '--read-only', action='store_true', help='expose only read methods; no write lock') parser.add_argument('-c', '--config', help='optional configuration file or json object of global params') parser.add_argument('-p', '--pidfile', metavar='FILE', help='store the process id in the given file') parser.add_argument('-d', '--daemonize', action='store_true', help='run the server as a daemon') parser.add_argument('--autoreload', type=float, metavar='SECONDS', help='automatically reload modules; replacement for engine.autoreload') parser.add_argument('--autoupdate', type=float, metavar='SECONDS', help='automatically update index version and commit any changes') parser.add_argument('--autosync', metavar='HOST{:PORT}{/PATH},...', help='automatically synchronize searcher with remote hosts and update') parser.add_argument('--real-time', action='store_true', help='search in real-time without committing') if __name__ == '__main__': args = parser.parse_args() read_only = args.read_only or args.autosync or len(args.directories) > 1 kwargs = {'nrt': True} if args.real_time else {} if read_only and (args.real_time or not args.directories): parser.error('incompatible read/write options') if args.autosync: kwargs['hosts'] = args.autosync.split(',') if not (args.autoupdate and len(args.directories) == 1): parser.error('autosync requires autoupdate and a single directory') if args.config and not os.path.exists(args.config): args.config = {'global': json.loads(args.config)} cls = WebSearcher if read_only else WebIndexer root = cls.new(*map(os.path.abspath, args.directories), **kwargs) del args.directories, args.read_only, args.autosync, args.real_time start(root, callback=init, **args.__dict__)