def __init__(self, whoosh_index_dir, model=2, implicit_or=True, pval=None, frag_type=2, frag_size=2, frag_surround=40, host=None, port=0): super(WhooshSearchInterface, self).__init__() log.debug("Whoosh Index to open: {0}".format(whoosh_index_dir)) self.__index = open_dir(whoosh_index_dir) self.__reader = self.__index.reader() self.__redis_conn = None if host is None: self.__engine = Whooshtrec(whoosh_index_dir=whoosh_index_dir, model=model, implicit_or=implicit_or) else: self.__engine = Whooshtrec(whoosh_index_dir=whoosh_index_dir, model=model, implicit_or=implicit_or, cache='engine', host=host, port=port) # Update (2017-05-02) for snippet fragment tweaking. # SIGIR Study (2017) uses frag_type==1 (2 doesn't give sensible results), surround==40, snippet_sizes==2,0,1,4 self.__engine.snippet_size = frag_size self.__engine.set_fragmenter(frag_type=frag_type, surround=frag_surround) if pval: self.__engine.set_model(model, pval)