Exemple #1
0
 def q(searcher, q, **options):
     options = {key.partition('.')[-1]: options[key] for key in options if key.startswith('q.')}
     field = options.pop('field', [])
     fields = [field] if isinstance(field, basestring) else field
     fields = [name.partition('^')[::2] for name in fields]
     if any(boost for name, boost in fields):
         field = {name: float(boost or 1.0) for name, boost in fields}
     elif isinstance(field, basestring):
         (field, boost), = fields
     else:
         field = [name for name, boost in fields] or ''
     if 'type' in options:
         with HTTPError(httplib.BAD_REQUEST, AttributeError):
             return getattr(engine.Query, options['type'])(field, q)
     for key in set(options) - {'op', 'version'}:
         with HTTPError(httplib.BAD_REQUEST, ValueError):
             options[key] = json.loads(options[key])
     if q is not None:
         with HTTPError(httplib.BAD_REQUEST, lucene.JavaError):
             return searcher.parse(q, field=field, **options)
Exemple #2
0
 def search(self, q=None, count=None, start=0, fields=None, sort=None, facets='', group='', hl='', mlt=None, spellcheck=0, timeout=None, **options):
     """Run query and return documents.
     
     **GET** /search?
         Return array of document objects and total doc count.
         
         &q=\ *chars*\ &q.type=[term|prefix|wildcard]&q.\ *chars*\ =...,
             query, optional type to skip parsing, and optional parser settings: q.field, q.op,...
         
         &filter=\ *chars*
             | cached filter applied to the query
             | if a previously cached filter is not found, the value will be parsed as a query
         
         &count=\ *int*\ &start=0
             maximum number of docs to return and offset to start at
         
         &fields=\ *chars*,... &fields.multi=\ *chars*,... &fields.indexed=\ *chars*\ [:*chars*],...
             only include selected stored fields; multi-valued fields returned in an array; indexed fields with optional type are cached
         
         &sort=\ [-]\ *chars*\ [:*chars*],... &sort.scores[=max]
             | field name, optional type, minus sign indicates descending
             | optionally score docs, additionally compute maximum score
         
         &facets=\ *chars*,... &facets.count=\ *int*\&facets.min=0
             | include facet counts for given field names; facets filters are cached
             | optional maximum number of most populated facet values per field, and minimum count to return
         
         &group=\ *chars*\ [:*chars*]&group.count=1
             | group documents by field value with optional type, up to given maximum count
         
         .. versionchanged:: 1.6 grouping searches use count and start options
         
         &hl=\ *chars*,... &hl.count=1&hl.tag=strong&hl.enable=[fields|terms]
             | stored fields to return highlighted
             | optional maximum fragment count and html tag name
             | optionally enable matching any field or any term
         
         &mlt=\ *int*\ &mlt.fields=\ *chars*,... &mlt.\ *chars*\ =...,
             | doc index (or id without a query) to find MoreLikeThis
             | optional document fields to match
             | optional MoreLikeThis settings: mlt.minTermFreq, mlt.minDocFreq,...
         
         &spellcheck=\ *int*
             | maximum number of spelling corrections to return for each query term, grouped by field
             | original query is still run; use q.spellcheck=true to affect query parsing
         
         &timeout=\ *number*
             timeout search after elapsed number of seconds
         
         :return:
             | {
             | "query": *string*\|null,
             | "count": *int*\|null,
             | "maxscore": *number*\|null,
             | "docs": [{"__id__": *int*, "__score__": *number*, "__keys__": *array*,
                 "__highlights__": {*string*: *array*,... }, *string*: *value*,... },... ],
             | "facets": {*string*: {*string*: *int*,... },... },
             | "groups": [{"count": *int*, "value": *value*, "docs": [*object*,... ]},... ]
             | "spellcheck": {*string*: {*string*: [*string*,... ],... },... },
             | }
     """
     searcher = self.searcher
     if sort is not None:
         sort = (re.match('(-?)(\w+):?(\w*)', field).groups() for field in sort)
         sort = [(name, (type or 'string'), (reverse == '-')) for reverse, name, type in sort]
         with HTTPError(httplib.BAD_REQUEST, AttributeError):
             sort = [searcher.sorter(name, type, reverse=reverse) for name, type, reverse in sort]
     q = parse.q(searcher, q, **options)
     qfilter = options.pop('filter', None)
     if qfilter is not None and qfilter not in searcher.filters:
         searcher.filters[qfilter] = engine.Query.filter(parse.q(searcher, qfilter, **options))
     qfilter = searcher.filters.get(qfilter)
     if mlt is not None:
         if q is not None:
             mlt, = searcher.search(q, count=mlt + 1, sort=sort)[mlt:].ids
         mltfields = options.pop('mlt.fields', ())
         with HTTPError(httplib.BAD_REQUEST, ValueError):
             attrs = {key.partition('.')[-1]: json.loads(options[key]) for key in options if key.startswith('mlt.')}
         q = searcher.morelikethis(mlt, *mltfields, analyzer=searcher.analyzer, **attrs)
     if count is not None:
         count += start
     if count == 0:
         start = count = 1
     scores = options.get('sort.scores')
     gcount = options.get('group.count', 1)
     scores = {'scores': scores is not None, 'maxscore': scores == 'max'}
     if ':' in group or group in searcher.sorters:
         hits = searcher.search(q, filter=qfilter, sort=sort, timeout=timeout, **scores)
         with HTTPError(httplib.BAD_REQUEST, AttributeError):
             groups = hits.groupby(searcher.comparator(*group.split(':')).__getitem__, count=count, docs=gcount)
         groups.groupdocs = groups.groupdocs[start:]
     elif group:
         scores = {'includeScores': scores['scores'], 'includeMaxScore': scores['maxscore']}
         groups = searcher.groupby(group, q, qfilter, count, start, sort=sort, groupDocsLimit=gcount, **scores)
     else:
         hits = searcher.search(q, filter=qfilter, sort=sort, count=count, timeout=timeout, **scores)
         groups = engine.documents.Groups(searcher, [hits[start:]], hits.count, hits.maxscore)
     result = {'query': q and unicode(q), 'count': groups.count, 'maxscore': groups.maxscore}
     tag, enable = options.get('hl.tag', 'strong'), options.get('hl.enable', '')
     hlcount = options.get('hl.count', 1)
     if hl:
         hl = {name: searcher.highlighter(q, name, terms='terms' in enable, fields='fields' in enable, tag=tag) for name in hl}
     fields, multi, indexed = parse.fields(searcher, fields, **options)
     if fields is None:
         fields = {}
     else:
         groups.select(*itertools.chain(fields, multi))
     result['groups'] = []
     for hits in groups:
         docs = []
         for hit in hits:
             doc = hit.dict(*multi, **fields)
             doc.update((name, indexed[name][hit.id]) for name in indexed)
             fragments = (hl[name].fragments(hit.id, hlcount) for name in hl)  # pragma: no branch
             if hl:
                 doc['__highlights__'] = {name: value for name, value in zip(hl, fragments) if value is not None}
             docs.append(doc)
         result['groups'].append({'docs': docs, 'count': hits.count, 'value': getattr(hits, 'value', None)})
     if not group:
         result['docs'] = result.pop('groups')[0]['docs']
     q = q or engine.Query.alldocs()
     if facets:
         facets = (tuple(facet.split(':')) if ':' in facet else facet for facet in facets)
         facets = result['facets'] = searcher.facets(q, *facets)
         for counts in facets.values():
             counts.pop(None, None)
         if 'facets.min' in options:
             for name, counts in facets.items():
                 facets[name] = {term: count for term, count in counts.items() if count >= options['facets.min']}
         if 'facets.count' in options:
             for name, counts in facets.items():
                 facets[name] = {term: counts[term] for term in heapq.nlargest(options['facets.count'], counts, key=counts.__getitem__)}
     if spellcheck:
         terms = result['spellcheck'] = collections.defaultdict(dict)
         for name, value in engine.Query.terms(q):
             terms[name][value] = list(itertools.islice(searcher.correct(name, value), spellcheck))
     return result
Exemple #3
0
        mount(root, path, config, autoupdate)
    cherrypy.quickstart(cherrypy.tree.apps.get(path), path, config)

parser = argparse.ArgumentParser(description='Restful json cherrypy server.', prog='lupyne.server')
parser.add_argument('directories', nargs='*', metavar='directory', help='index directories')
parser.add_argument('-r', '--read-only', action='store_true', help='expose only read methods; no write lock')
parser.add_argument('-c', '--config', help='optional configuration file or json object of global params')
parser.add_argument('-p', '--pidfile', metavar='FILE', help='store the process id in the given file')
parser.add_argument('-d', '--daemonize', action='store_true', help='run the server as a daemon')
parser.add_argument('--autoreload', type=float, metavar='SECONDS', help='automatically reload modules; replacement for engine.autoreload')
parser.add_argument('--autoupdate', type=float, metavar='SECONDS', help='automatically update index version and commit any changes')
parser.add_argument('--autosync', metavar='HOST{:PORT}{/PATH},...', help='automatically synchronize searcher with remote hosts and update')
parser.add_argument('--real-time', action='store_true', help='search in real-time without committing')

if __name__ == '__main__':
    args = parser.parse_args()
    read_only = args.read_only or args.autosync or len(args.directories) > 1
    kwargs = {'nrt': True} if args.real_time else {}
    if read_only and (args.real_time or not args.directories):
        parser.error('incompatible read/write options')
    if args.autosync:
        kwargs['hosts'] = args.autosync.split(',')
        if not (args.autoupdate and len(args.directories) == 1):
            parser.error('autosync requires autoupdate and a single directory')
    if args.config and not os.path.exists(args.config):
        args.config = {'global': json.loads(args.config)}
    cls = WebSearcher if read_only else WebIndexer
    root = cls.new(*map(os.path.abspath, args.directories), **kwargs)
    del args.directories, args.read_only, args.autosync, args.real_time
    start(root, callback=init, **args.__dict__)