Python MultifieldParser Examples, whoosh.qparser.MultifieldParser Python Examples

Example #1

0

Show file

    def parse_query(self, query, *fields, group="AND"):
        if group == "OR":
            parser = qparser.MultifieldParser(fields, schema=self.index.schema, group=qparser.OrGroup)
        else:
            parser = qparser.MultifieldParser(fields, schema=self.index.schema)

        q = parser.parse(query)

        return q

Example #2

0

Show file

File: present.py Project: digijap/IPFIT5

def render_results(s, qs, template):
    #qp = qparser.QueryParser("content", s.schema)
    qp = qparser.MultifieldParser(["tgrams", "title"], s.schema)

    # Add the DateParserPlugin to the parser
    qp.add_plugin(DateParserPlugin())

    q = qp.parse(qs)

    results = s.search(q, limit=100, sortedby="title")
    #results = s.search(q, limit=100, sortedby="title", reverse=True)
    #results = s.search(q, limit=100, groupedby="chapter")
    q = results.q

    #hf = highlight.HtmlFormatter()
    #results.highlighter = highlight.Highlighter(formatter=hf)

    qc = None

    def hilite(hit):
        with open(SOURCEDIR + hit["path"], "rb") as hitfile:
            text = hitfile.read().decode("utf-8")
        return hit.highlights("content", text)

    return render_template(template,
                           qs=qs,
                           q=q,
                           results=results,
                           hilite=hilite,
                           corrected=qc,
                           args=request.args)

Example #3

0

Show file

File: resource_se.py Project: mehmetozturk4705/pittotb

    def search_content(self, search_statement: str):
        cur_lang = "en"
        try:
            lng = langdetect.detect(search_statement)
            cur_lang = lng if lng == "tr" else cur_lang
        except Exception as e:
            pass
        result_list = []
        with self.__index.searcher() as searcher:
            query = qparser.MultifieldParser(
                ["message", "meta_content"]
                if cur_lang == "en" else ["message_tr", "meta_content_tr"],
                self.__schema,
                termclass=CustomFuzzyTerm).parse(search_statement)
            results = searcher.search(query)
            for r in results:
                result_list.append({
                    "_docnum": r.docnum,
                    "message_id": r["message_id"],
                    "chat_id": r["chat_id"],
                    "message": r["message"],
                    "meta_content": r["meta_content"]
                })

        return result_list

Example #4

0

Show file

def post_search(pn=1, size=10):
    keyword = request.values.get('kw')
    if keyword is None:
        return render_template('search/list.html',
                               title='搜索',
                               message='搜索关键字不能为空!')
    with whoosh_searcher.get_searcher('posts') as searcher:
        # q = query.Or([query.Term('title', keyword), query.Term('content', keyword)])
        parser = qparser.MultifieldParser(
            ['title', 'content'],
            whoosh_searcher.get_index('posts').schema)
        q = parser.parse(keyword)
        result = searcher.search_page(q,
                                      pagenum=pn,
                                      pagelen=size,
                                      sortedby=sorting.ScoreFacet())
        result_list = [x.fields() for x in result.results]
        page = models.Page(pn,
                           size,
                           result=result_list,
                           has_more=result.pagecount > pn,
                           total_page=result.pagecount,
                           total=result.total)
        print(page.result)
    # return jsonify(page)
    return render_template('search/list.html',
                           title=keyword + '搜索结果',
                           page=page,
                           kw=keyword)

Example #5

0

Show file

def finalresult():
    if request.method == 'POST':
        #search query
        query = request.form['QA']
        print(query)
        results = []
        ix = index.open_dir("qadata_Index")
        schema = ix.schema
        # Create query parser that looks through designated fields in index
        og = qparser.OrGroup.factory(0.9)
        mp = qparser.MultifieldParser(['question', 'answer'], schema, group=og)
        # This is the user query
        q = mp.parse(request.form['QA'])
        # Actual searcher, prints top 10 hits
        with ix.searcher(weighting=scoring.BM25F()) as s:
            results = s.search(q, limit=5)
            for i in range(5):
                print(results[i]['question'], str(results[i].score),
                      results[i]['answer'])
            return render_template("result.html",
                                   searchquery=request.form['QA'],
                                   Q1=results[0]['question'],
                                   A1=results[0]['answer'],
                                   Q2=results[1]['question'],
                                   A2=results[1]['answer'],
                                   Q3=results[2]['question'],
                                   A3=results[2]['answer'],
                                   Q4=results[3]['question'],
                                   A4=results[3]['answer'],
                                   Q5=results[4]['question'],
                                   A5=results[4]['answer'])

Example #6

0

Show file

File: r_precision.py Project: vedatk67/Whoosh_SE

def Rprecision(queries, ground_truth, top_5_table):
    ## This function takes the queries , which have the groun truth values
    ## For each Analyzer method and scoring function find the result queries
    ## In result queries, there are as much as length of ground_truth resulting query
    ## Finally it compares ground_truth with resul queries and save the resul for
    ##  each Analyzer_Scoring rprecission values for each query stored in final_result_dict

    final_result_dict = {}
    for Analyzer, Scoring_Function in zip(top_5_table[0], top_5_table[1]):
        ix = index.open_dir(path + Analyzer + "Analyzer")
        scoring_function = getattr(scoring, Scoring_Function)()
        analyzer_result = {}
        for q_id in queries:
            query = queries[q_id]
            max_number_of_results = len(ground_truth[q_id])
            fields = ix.schema.names()
            fields.remove('id')
            qp = qparser.MultifieldParser(fields, ix.schema)
            parsed_query = qp.parse(query)

            # qp = qparser.QueryParser("body", ix.schema)
            # parsed_query = qp.parse(query)  # parsing the query

            searcher = ix.searcher(weighting=scoring_function)
            results = searcher.search(parsed_query,
                                      limit=max_number_of_results)
            final_result = []
            for result in results:
                if int(result["id"]) in gt[q_id]:
                    final_result.append(1)
            analyzer_result[q_id] = sum(final_result) / len(ground_truth[q_id])
        final_result_dict["_".join([Analyzer,
                                    Scoring_Function])] = analyzer_result
        searcher.close()
    return final_result_dict

Example #7

0

Show file

File: TensorFlowLM.py Project: stiwari38/AIChallenge

def CreateQueryParser():
    global qp
    og = qparser.OrGroup.factory(0.9)
    qp = qparser.MultifieldParser(["Title", "ParaText"],
                                  schema=ix.schema,
                                  group=og)
    qp.add_plugin(FuzzyTermPlugin())

Example #8

0

Show file

File: fulltext.py Project: wrestcody/Bookie

    def search(self, phrase, content=False, username=None, ct=10, page=0):
        """Implement the search, returning a list of bookmarks"""
        page = int(page) + 1

        with WIX.searcher() as search:
            fields = ['description', 'extended', 'tags']

            if content:
                fields.append('readable')

            parser = qparser.MultifieldParser(fields,
                                              schema=WIX.schema,
                                              group=qparser.OrGroup)
            qry = parser.parse(phrase)

            try:
                res = search.search_page(qry, page, pagelen=int(ct))
            except ValueError, exc:
                raise(exc)

            if res:
                qry = Bmark.query.filter(
                    Bmark.bid.in_([r['bid'] for r in res])
                )

                if username:
                    qry = qry.filter(Bmark.username == username)

                qry = qry.options(joinedload('hashed'))

                return qry.all()
            else:
                return []

Example #9

0

Show file

File: utils.py Project: abaelhe/djoosh-1

def search_index(search_model, query, fields=[], limit=None):
    ix = index.open_dir(search_model.get_path())
    fields = fields or search_model.fields
    hits = []
    query = smart_unicode(query)

    limit = limit or getattr(settings, 'DJOOSH_SEARCH_LIMIT', 100)

    if query and fields:
        query = query.replace('+', ' AND ').replace('|', ' OR ')
        parser = qparser.MultifieldParser(fields, schema=ix.schema)
        qry = parser.parse(query)

        try:
            qry = parser.parse(query)
        except:
            qry = None

        if qry:
            searcher = ix.searcher()
            try:
                hits = searcher.search(qry, limit=limit)
            except:
                hits = []

    ix.close()
    return hits

Example #10

0

Show file

def post_search(pn=1, size=10):
    """

    :param pn:
    :param size:
    :return:
    """
    keyword = request.values.get('kw')
    if keyword is None:
        return render_template('search/list.html',
                               title='搜索',
                               message='搜索关键字不能为空')
    with whoosh_searcher.get_searcher('posts') as searcher:
        parser = qparser.MultifieldParser(
            ['title', 'content'],
            whoosh_searcher.get_index('posts').schema)
        q = parser.parser(keyword)
        result = searcher.search_page(q,
                                      pagenum=pn,
                                      pagelen=size,
                                      sortedby=sorting.ScoreFacet())
        result_list = [x.fields() for x in result.results]
        page = models.Page(page_num=pn,
                           per_page=size,
                           result_list=result_list,
                           has_more=result.pagecount > pn,
                           total_page=result.pagecount,
                           total=result.total)
    return render_template('search/list.html',
                           title=keyword + '搜索结果',
                           page=page,
                           kw=keyword)

Example #11

0

Show file

File: test_searching.py Project: ws-os/oh-mainline

def test_missing_field_scoring():
    schema = fields.Schema(name=fields.TEXT(stored=True),
                           hobbies=fields.TEXT(stored=True))
    storage = RamStorage()
    ix = storage.create_index(schema)
    writer = ix.writer()
    writer.add_document(name=u('Frank'), hobbies=u('baseball, basketball'))
    writer.commit()
    r = ix.reader()
    assert_equal(r.field_length("hobbies"), 2)
    assert_equal(r.field_length("name"), 1)
    r.close()

    writer = ix.writer()
    writer.add_document(name=u('Jonny'))
    writer.commit()

    with ix.searcher() as s:
        r = s.reader()
        assert_equal(len(ix._segments()), 1)
        assert_equal(r.field_length("hobbies"), 2)
        assert_equal(r.field_length("name"), 2)

        parser = qparser.MultifieldParser(['name', 'hobbies'], schema)
        q = parser.parse(u("baseball"))
        result = s.search(q)
        assert_equal(len(result), 1)

Example #12

0

Show file

    def __init__(self,
                 index=None,
                 search_fields=["title", "content"],
                 html_formatter=None,
                 parser=None,
                 termclass=Term):
        """Clase para buscar por distintos fields

        :param: index
        :type: whoosh.index.Index - Instancia del objeto Index

        :param: search_fields - Lista de los campos donde se busca
        :type: list

        :param: html_formatter - Instancia que formatea los hits
        :type: whoosh.highlight.HtmlFormatter
        """

        self.index = index or open_dir(INDEX_DIR)
        self.html_formatter = html_formatter or HtmlFormatter(
            between="...",
            tagname="strong",
            classname="search-match",
            termclass="search-term")

        self.search_fields = search_fields
        self.termclass = termclass

        self.parser = parser or qparser.MultifieldParser(
            self.search_fields, self.index.schema, termclass=termclass)

Example #13

0

Show file

 def search(self, query_str, limit=20):
     schema = self.get_schema()
     idx = self.get_index()
     query = qparser.MultifieldParser(
         self.setting.searchable_columns,
         schema=schema,
     ).parse(query_str)
     with idx.searcher() as searcher:
         result = [hit.fields() for hit in searcher.search(query, limit=limit)]
     return result

Example #14

0

Show file

File: ismi_search.py Project: jamieklassen/ismi-search

 def parse(filt):
     if filt.query_type == Filter.Q_APPROX:
         mp = qparser.MultifieldParser(filt.get_fields(), schema=schema)
         return mp.parse(unicode(filt.query_string))
     elif filt.query_type == Filter.Q_EXACT:
         s = cls.get_index().searcher()
         qs = filt.query_string
         f = lambda d: qs in [
             d.get(field) for field in filt.get_fields()
         ]
         ids = [unicode(d['id']) for d in filter(f, s.documents())]
         return query.Or([query.Term('id', iden) for iden in ids])

Example #15

0

Show file

File: service.py Project: AlexWoroschilow/AOD-Reader

    def search(self, string=None, fields=["title", "content"]):
        query_parser = qparser.MultifieldParser(fields,
                                                self.ix.schema,
                                                group=qparser.OrGroup)
        query_parser.remove_plugin_class(qparser.PhrasePlugin)
        query_parser.add_plugin(qparser.FuzzyTermPlugin())
        query_parser.add_plugin(qparser.SequencePlugin())

        with self.ix.searcher(weighting=scoring.BM25F) as searcher:
            pattern = query_parser.parse(u'"{}"'.format(string))
            for result in searcher.search(pattern, limit=None):
                yield result

Example #16

0

Show file

File: views.py Project: rogeriofalcone/anaf

def search_query(request, response_format='html'):
    """Account view"""

    objects = []
    query = request.GET.get('q', '')
    if query:
        if query[:5] == 'tags:':
            tag_names = query[5:].strip().split(',')
            tags = Tag.objects.filter(name__in=tag_names)
            objects = Object.objects.filter(tags__in=tags)
        else:
            search_engine = getattr(settings, 'SEARCH_ENGINE', 'whoosh')
            if search_engine == 'whoosh':
                ix = index.open_dir(settings.WHOOSH_INDEX)
                # Whoosh doesn't understand '+' or '-' but we can replace
                # them with 'AND' and 'NOT'.
                squery = query.replace('+', ' AND ').replace('|',
                                                             ' OR ').replace(
                                                                 ' ', ' OR ')
                parser = qparser.MultifieldParser(
                    ["name", "url", "type", "content"], schema=ix.schema)
                qry = parser.parse(squery)
                try:
                    qry = parser.parse(squery)
                except:
                    # don't show the user weird errors only because we don't
                    # understand the query.
                    # parser.parse("") would return None
                    qry = None
                if qry:
                    searcher = ix.searcher()
                    try:
                        hits = searcher.search(qry, limit=100)
                    except:
                        hits = []

                    hit_ids = [hit['id'] for hit in hits]

                    objects = Object.objects.filter(pk__in=hit_ids)
            elif search_engine == 'db':
                objects = dbsearch.search(query)
            else:
                raise RuntimeError(
                    'Unknown Search engine: {0!s}'.format(search_engine))

    return render_to_response('core/search/query_view', {
        'query': query,
        'objects': objects
    },
                              context_instance=RequestContext(request),
                              response_format=response_format)

Example #17

0

Show file

    def _mk_parser(self):
        from whoosh import qparser as qparse

        parser = qparse.MultifieldParser(self.idx_obj.schema.names(),
                                         self.idx_obj.schema)
        # XXX: plugin is broken in Debian's whoosh 2.7.0-2, but already fixed
        # upstream
        parser.add_plugin(qparse.FuzzyTermPlugin())
        parser.add_plugin(qparse.GtLtPlugin())
        parser.add_plugin(qparse.SingleQuotePlugin())
        # replace field defintion to allow for colons to be part of a field's name:
        parser.replace_plugin(
            qparse.FieldsPlugin(expr=r"(?P<text>[()<>.\w]+|[*]):"))
        self.parser = parser

Example #18

0

Show file

File: whoosh_tutorial.py Project: Neoky/smart_scrapy

def index_search(dirname, search_fields, search_query):
    ix = index.open_dir(dirname)
    sch = ix.schema
    # Create query parser that looks through designated fields in index
    og = qparser.OrGroup.factory(0.9)
    mp = qparser.MultifieldParser(search_fields, sch, group=og)

    # This is the user query
    q = mp.parse(search_query)

    # Actual searcher, prints top 10 hits
    with ix.searcher() as s:
        results = s.search(q, limit=10)
        print("Search Results: ")
        pprint(results[0:10])

Example #19

0

Show file

File: whoosh.py Project: harryahlas/bescea

def index_search(dirname, search_fields, search_query):
    ix = index.open_dir(dirname)
    schema = ix.schema
    
    og = qparser.OrGroup.factory(0.9)
    mp = qparser.MultifieldParser(search_fields, schema, group = og)

    
    q = mp.parse(search_query)
    
    
    with ix.searcher() as s:
        results = s.search(q, terms=True, limit = return_results_count)
        print("Completing Whoosh Search")
        tmp_df = pd.concat([pd.DataFrame([[hit['path']]], columns=['path']) for hit in results], ignore_index=True)
        return(tmp_df)

Example #20

0

Show file

File: fileSearchTest.py Project: hamzaElhaddaoui/FileSearch

def searchDataByNameAndType(name, type):
    ix = open_dir("indexdir")
    query_str = name + '* ' + type

    with ix.searcher(weighting=scoring.Frequency) as searcher:
        query = qparser.MultifieldParser(["title", "extension"],
                                         ix.schema).parse(query_str)
        results = searcher.search(query, limit=None)
        if len(results) > 0:
            for i in range(len(results)):
                print("File Name: " + results[i]['title'],
                      "Path: " + results[i]['path'],
                      "Extension: " + results[i]['extension'])

        else:
            print("Aucun resultat trouvé !")

Example #21

0

Show file

def search_clips(query, page):
    ''' search_clips returns the clips found by the given query
    Clips are stored in a named tuple called ClipSearchResults.
    ClipSearchResults has two fields:
    - clips: a list of Clip objects on the page
    - length: the total number of clips that match the query
    '''
    parser = qparser.MultifieldParser(['title', 'description', 'tags', 'user'], clip_index.schema)
    with clip_index.searcher() as searcher:
        results = searcher.search_page(parser.parse(query), page, pagelen=cn.SEARCH_CLIPS_PER_PAGE)

        clips = []
        for result in results:
            clips.append(Clip.query.get(result['id']))

        ClipSearchResults = namedtuple('ClipSearchResults', ['clips', 'length'])
        return ClipSearchResults(clips, len(results))

Example #22

0

Show file

 def search_index(self, search_term):
     model_index = self._get_index()
     schema = self._get_schema()
     fields = list()
     for field in self._get_indexable_columns():
         if field == "id":
             field = "model_id"
         value = getattr(self, field)
         # Do not search the primary key
         if not value.primary_key:
             fields.append(field)
     parser = qparser.MultifieldParser(fields, schema)
     query = parser.parse(search_term)
     with model_index.searcher() as searcher:
         results = searcher.search(query)
         for result in results:
             yield result

Example #23

0

Show file

def validate_question(text):
    """Check validity of the question:
    1. filter by length 2. auto-correction 3. OOV"""
    global valid, ix

    # empty or too long questions are not allowed
    if len(text) == 0:
        error = "<div class=\"alert alert-warning\"> Sorry, the question appears to be empty. Try again? </div>"
        return False, error
    elif len(text) > 150:
        error = "<div class=\"alert alert-warning\"> Sorry, the question is too long. Try to use only 150 characters." \
                " </div>"
        return False, error

    mparser = qparser.MultifieldParser(["answer"], schema=ix.schema)

    # auto-correction built in Whoosh
    with ix.searcher() as s:
        q = mparser.parse(text.replace("?", ""))
        corrected = s.correct_query(q, text)
        if corrected.query != q:
            error = "<div class=\"alert alert-warning\"> Did you mean: <a href=\"" + url_for(
                'passage.process_question', received_question=corrected.string
            ) + "\">" + corrected.string + "</a>?</div>"
            return False, error

    # the question is valid, but contains lemmata which are not in vocabulary, so a warning is displayed
    oov_num = 0
    for word in text.replace("?", "").split():
        if not (lemmatizer.lemmatize(word, pos='v')
                in vocabulary_encoded.keys()):
            if not (lemmatizer.lemmatize(
                    word.lower(), pos='v')) in vocabulary_encoded.keys():
                oov_num += 1
    if oov_num != 0:
        if oov_num < len(text.split()):
            warning = "<div class=\"alert alert-warning\"> The question has words that are not in vocabulary." \
                      " if you rephrase it, you might get better results. </div>"
            return True, warning
        else:
            error = "<div class=\"alert alert-warning\"> Sorry, could not understand your input." \
                    " </div>"
            return False, error

    return True, ""

Example #24

0

Show file

File: test_fields.py Project: CuteCha/dssm-theano

def test_boolean_multifield():
    schema = fields.Schema(name=fields.TEXT(stored=True),
                           bit=fields.BOOLEAN(stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(name=u('audi'), bit=True)
        w.add_document(name=u('vw'), bit=False)
        w.add_document(name=u('porsche'), bit=False)
        w.add_document(name=u('ferrari'), bit=True)
        w.add_document(name=u('citroen'), bit=False)

    with ix.searcher() as s:
        qp = qparser.MultifieldParser(["name", "bit"], schema)
        q = qp.parse(u("boop"))

        r = s.search(q)
        assert sorted(hit["name"] for hit in r) == ["audi", "ferrari"]
        assert len(r) == 2

Example #25

0

Show file

def index_search(dirname, search_fields, search_query):
    ix = index.open_dir(dirname)
    schema = ix.schema
    # Create query parser that looks through designated fields in index
    og = qparser.OrGroup.factory(0.9)
    mp = qparser.MultifieldParser(search_fields, schema, group = og)
    # This is the user query
    q = mp.parse(search_query)
    # Actual searcher, prints top 10 hits
    with ix.searcher() as s:
        results = s.search(q, limit = None)
        print("Total Documents: ",ix.doc_count_all())
        print("Retrieved Documents: ",results.estimated_length())
        print(results._get_scorer())
        for i,result in enumerate(results[0:5]):
            print("Search Results: ",result.rank,"Score: ",result.score)
            print("Question: ",result['question'])
            print("Answer: ",result['answer'])
            print("------------------------")

Example #26

0

Show file

File: search.py Project: zhuzhuxia480/callcenter

def search(query, page=1, per_page=20):
    with index.searcher() as s:
        qp = qparser.MultifieldParser(['title', 'content'], index.schema)
        q = qp.parse(unicode(query))
        try:
            result_page = s.search_page(q, page, pagelen=per_page)
        except ValueError:
            if page == 1:
                return SearchResultPage(None, page)
            return None
        results = result_page.results
        results.highlighter.fragmenter.maxchars = 512
        results.highlighter.fragmenter.surround = 40
        results.highlighter.formatter = highlight.HtmlFormatter(
            'em',
            classname='search-match',
            termclass='search-term',
            between=u'<span class=ellipsis> … </span>')
        return SearchResultPage(result_page, page)

Example #27

0

Show file

File: bbs_index.py Project: Manchangdx/PyFly

def post_search(pn=1, size=10):
    keyword = request.values.get('kw')
    if keyword is None:
        return render_template('search/list.html',
                               title='搜索',
                               message='搜索关键字不能为空!')
    whoosh_searcher.clear('posts')
    writer = whoosh_searcher.get_writer('posts')
    for item in mongo.db['posts'].find(
        {}, ['_id', 'title', 'content', 'create_at', 'user_id', 'catalog_id']):
        item['obj_id'] = str(item['_id'])
        item['user_id'] = str(item['user_id'])
        item['catalog_id'] = str(item['catalog_id'])
        item.pop('_id')
        writer.add_document(**item)
    # 保存修改
    writer.commit()
    with whoosh_searcher.get_searcher('posts') as searcher:
        # 解析查询字符串
        parser = qparser.MultifieldParser(
            ['title', 'content'],
            whoosh_searcher.get_index('posts').schema)
        q = parser.parse(keyword)
        print('q:', q)
        # 搜索得到结果
        result = searcher.search_page(q,
                                      pagenum=pn,
                                      pagelen=size,
                                      sortedby=sorting.ScoreFacet())
        result_list = [x.fields() for x in result.results]
        # 构建页面对象
        page = Page(pn,
                    size,
                    result=result_list,
                    has_more=result.pagecount > pn,
                    page_count=result.pagecount,
                    total=result.total)
    return render_template('search/list.html',
                           title=keyword + '搜索结果',
                           page=page,
                           kw=keyword)

Example #28

0

Show file

File: search.py Project: Animesh-Ghosh/ProductSearch

def query_index(q, offset, limit):
    ix = index.open_dir('index')
    products = []

    with ix.searcher() as searcher:
        mp = qparser.MultifieldParser(['name', 'description'], ix.schema)
        mpq = mp.parse(q)
        results = searcher.search_page(mpq, pagenum=offset + 1, pagelen=limit)

        for result in results:
            pprint(result)

            products.append({
                'id': result['ID'],
                'image': result['image'],
                'name': result['name'],
                'description': result['description'],
                'price': result['price']
            })

    return (products, len(results))

Example #29

0

Show file

    def test_missing_field_scoring(self):
        schema = fields.Schema(name=fields.TEXT(stored=True),
                               hobbies=fields.TEXT(stored=True))
        storage = store.RamStorage()
        idx = index.Index(storage, schema, create=True)
        writer = idx.writer()
        writer.add_document(name=u'Frank', hobbies=u'baseball, basketball')
        writer.commit()
        self.assertEqual(idx.segments[0].field_length(0), 2)  # hobbies
        self.assertEqual(idx.segments[0].field_length(1), 1)  # name

        writer = idx.writer()
        writer.add_document(name=u'Jonny')
        writer.commit()
        self.assertEqual(len(idx.segments), 1)
        self.assertEqual(idx.segments[0].field_length(0), 2)  # hobbies
        self.assertEqual(idx.segments[0].field_length(1), 2)  # name

        parser = qparser.MultifieldParser(['name', 'hobbies'], schema=schema)
        searcher = idx.searcher()
        result = searcher.search(parser.parse(u'baseball'))
        self.assertEqual(len(result), 1)

Example #30

0

Show file

def question_tokens_to_query(keywords):
    """ From a list of keywords and its synonym, transform to whoosh-defined query format """
    # Build query from keywords
    query_str = ""
    for keyword in keywords:
        keywords_str = "("
        for i in range(len(keyword)):
            keywords_str += keyword[i] + " OR "
        keywords_str = keywords_str[:-4]  # Remove the last " OR "
        keywords_str += ")"
        query_str += keywords_str + " "

    # From query string build whoosh-defined query
    ix = index.open_dir(index_dir)
    parser = qparser.MultifieldParser(["title", "content"], ix.schema)
    parser.remove_plugin_class(qparser.PhrasePlugin)
    parser.add_plugin(qparser.SequencePlugin())  # For complex pharse query
    parser.add_plugin(qparser.FuzzyTermPlugin()
                      )  # Search for term that dont have to match exactly
    query = parser.parse(query_str)

    return query