Example #1
0
    def search(self, q, stemmed, syn):
        path = os.path.dirname(__file__)
        ix = open_dir(path + "/index")
        parser = MultifieldParser(["title", "content"], ix.schema)
        searcher = ix.searcher()
        cp = qparser.OperatorsPlugin(And="&", Or="\|", AndNot="!")
        parser.replace_plugin(cp)

        if syn == True:

            for w in q.split(" "):
                synonyms = self.getGermanSynonyms(w)
                synQ = "(" + w
                for s in synonyms:
                    synQ += "\|" + s
                synQ += ")"
                qNew = q.replace(w, synQ)
            q = qNew

        searchQuery = parser.parse(q)
        allresults = searcher.search(searchQuery, limit=None)

        if stemmed == True:
            results = self.searchGermanStemmed(q, ix)
            allresults.upgrade_and_extend(results)
            results = self.searchFrenchStemmed(q, ix)
            allresults.upgrade_and_extend(results)

        print(searchQuery)

        return allresults
Example #2
0
    def lookup(self, term, fuzzy=False, limit=None):
        term = term.strip()
        term = term.lower()

        if limit:
            limit = limit
        else:
            limit = self.RESULTS_LIMIT

        fields = (
            'indice',
            'indice_game',
            'name',
            'name_jp',
            'game',
            'version',
            'classification',
            'element',
            'code',
            'size',
            'damage_min',
            'damage_max',
            'recovery',
            'rarity'
        )
        if fuzzy:
            parser = MultifieldParser(
                fields,
                schema=self.index.schema,
                termclass=FuzzyTerm
            )
        else:
            parser = MultifieldParser(fields, schema=self.index.schema)

        operators = OperatorsPlugin(
            And="&",
            Or="\\|",
            AndNot="&!",
            AndMaybe="&~",
            Not="\\-"
        )
        parser.replace_plugin(operators)
        query = parser.parse(term)
        results = []
        try:
            searcher = self.index.searcher()
            results = searcher.search(query, limit=limit)

            if not results and not fuzzy:
                # Try a Fuzzy Search.
                return self.lookup(term, fuzzy=True, limit=self.FUZZY_LIMIT)
        except IndexError:
            pass
            
        return results
        def searchFrenchStemmed(self, q, ix):            
            stemmer = snowballstemmer.stemmer('french')             
            
            parser = MultifieldParser(["title", "titleStemmed", "contentStemmed" ,"content"], ix.schema)
            queryStemmedList = stemmer.stemWords(unicode(q).split())
            q = ""
            for w in queryStemmedList:
                q += w + " "

                
            searcher = ix.searcher()   
            cp = qparser.OperatorsPlugin(And="&", Or="\|", AndNot="!")
            parser.replace_plugin(cp) 
            searchQuery = parser.parse(q)
            print(searchQuery)
            results= searcher.search(searchQuery, limit=None)            
          
            return  results 
Example #4
0
def ranked_search(query):
    # ix = create_in("indexdir", schema)
    ix = open_dir("indexdir")
    writer = ix.writer()
    with ix.searcher(weighting=scoring.BM25F(0.75, 1.2)) as searcher:
        # qp = QueryParser("full_description", schema)
        qp = MultifieldParser([
            "description", "full_description", "file_text", "reporter", "date"
        ], ix.schema)
        cp = OperatorsPlugin(And="&", Or="\|", AndMaybe="&~", Not=None)
        qp.replace_plugin(cp)

        qAND = qp.parse(query, normalize=True)
        qOR = qp.parse(queryOR(query), normalize=True)
        qAM = qp.parse(queryAndMaybe(query), normalize=True)

        # results are Result objects which have lists of hits
        qAND_docs = searcher.search(qAND, limit=None, terms=True)
        qOR_docs = searcher.search(qOR, limit=None, terms=True)
        qAM_docs = searcher.search(qAM, limit=None, terms=True)

        qAND_docs.fragmenter.maxchars = 160
        qOR_docs.fragmenter.maxchars = 160
        qAM_docs.fragmenter.maxchars = 160

        # Shortening the surrounding chars of text. Default was 20. Now tweet size
        qAND_docs.fragmenter.surround = 20
        qOR_docs.fragmenter.surround = 20
        qAM_docs.fragmenter.surround = 20

        # Used for highlighting matched terms
        # Scoring will still be reflected in order.
        # Sending only OR docs will just simplify display
        hits = get_stats(qOR_docs)

        results = []

        for i in range(len(list(qAND_docs))):
            report_id = Report.objects.get(
                pk=int(qOR_docs[i].fields()['report_id']))
            results.append((report_id, qOR_docs[i].score))

        for j in range(len(list(qOR_docs))):
            report_id = Report.objects.get(
                pk=int(qOR_docs[j].fields()['report_id']))
            results.append((report_id, qOR_docs[j].score))

        for k in range(len(list(qAM_docs))):
            report_id = Report.objects.get(
                pk=int(qAM_docs[k].fields()['report_id']))
            results.append((report_id, qAM_docs[k].score))

        results = removeDuplicates(results)

        results_and_info = {
            'results': results,
            'hits': hits,
        }

        # for numid, fields in hits.items():
        #     print(numid)
        #     for field, field_value in fields.items():
        #         print(field)
        #         if field == 'full_description':
        #             for term in field_value:
        #                 print(term)
        #             print('\n')
        #         elif field == 'description':
        #             for term2 in field_value:
        #                 print(term2)
        #             print('\n')
        #         elif field == 'file_text':
        #             print(field_value)
        #             print('\n')
        #         elif field == 'date':
        #             print(field_value)
        #             print('\n')
        #         elif field == 'reporter':
        #             print(field_value)
        #             print('\n')

        return results_and_info