コード例 #1
0
ファイル: search_engine.py プロジェクト: KGerring/fca_search
class SearchEngine:
    def __init__(self, index, stopwords):
        self.parser = BooleanParser()
        self.index = index
        self.stopwords = stopwords

    def search(self, query, lang, stem=True):
        normQuery = remove_nonletters(query, ' ', ['(', ')'])
        parsedQuery, terms, wordsTerms = self._parse_query(
            normQuery, stem, lang)
        documents = self.index.get_documents(parsedQuery)
        rankedResults = score(terms, documents, self.index, lang)
        sortedResults = sorted(rankedResults,
                               key=lambda doc: doc['score'],
                               reverse=True)
        return {
            'documents': sortedResults,
            'terms': terms,
            'pureQuery': query,
            'parsedQuery': parsedQuery,
            'wordsTerms': wordsTerms
        }

    def nostemSearch(self, query, lang):
        return self.search(query, lang, False)

    def _parse_query(self, query, stem, lang):
        self.parser.stem = False
        noStemParsedQuery = self.parser.parse(query, lang, self.stopwords)
        wordsTerms = self.parser.terms(noStemParsedQuery)
        self.parser.stem = stem
        pquery = self.parser.parse(query, lang, self.stopwords)
        terms = self.parser.terms(pquery)
        return pquery, terms, wordsTerms
コード例 #2
0
ファイル: search_engine.py プロジェクト: havrlant/fca-search
class SearchEngine:
	def __init__(self, index, stopwords):
		self.parser = BooleanParser()
		self.index = index
		self.stopwords = stopwords
	
	def search(self, query, lang, stem = True):
		normQuery = remove_nonletters(query, ' ', ['(', ')'])
		parsedQuery, terms, wordsTerms = self._parse_query(normQuery, stem, lang)
		documents = self.index.get_documents(parsedQuery)
		rankedResults = score(terms, documents, self.index, lang)
		sortedResults = sorted(rankedResults, key=lambda doc: doc['score'], reverse=True)
		return {'documents':sortedResults, 'terms':terms, 
				'pureQuery':query, 'parsedQuery':parsedQuery, 'wordsTerms':wordsTerms}
	
	def nostemSearch(self, query, lang):
		return self.search(query, lang, False)
		
	def _parse_query(self, query, stem, lang):
		self.parser.stem = False
		noStemParsedQuery = self.parser.parse(query, lang, self.stopwords)
		wordsTerms = self.parser.terms(noStemParsedQuery)
		self.parser.stem = stem
		pquery = self.parser.parse(query, lang, self.stopwords)
		terms = self.parser.terms(pquery)
		return pquery, terms, wordsTerms
コード例 #3
0
	def test_get_documents(self):
		parse = BooleanParser().parse
		fun = lambda x: self.index.get_documents(parse(x))
		lfun = lambda x: len(fun(x))
		
		self.assertEqual(lfun('rovnice průměr'), 1)
		self.assertEqual(lfun('průměr NOT úhlopříčky'), 1)
		self.assertEqual(lfun('rovnice'), 8)
		self.assertEqual(lfun('rovnice NOT spojitost'), 5)
		self.assertEqual(lfun('(statistika OR pythagorova)'), 3)
コード例 #4
0
class BooleanParserTest(unittest.TestCase):
    parser = BooleanParser()

    def test_pure_parse(self):
        fun = lambda x: repr(self.parser._pure_parse(x))
        ass = self.assertEqual

        ass(fun('arg1 AND arg2 OR arg3'), "(('arg1' AND 'arg2') OR 'arg3')")
        ass(fun('(ARG AND ARG (ARG OR NOT ARG))'),
            "('ARG' AND 'ARG' AND ('ARG' OR (NOT('ARG'))))")
        ass(fun(''), '()')
        ass(fun('OR OR (AND test NOT)'), "(() OR () OR ('test' AND ''))")

    def test_parse(self):
        ass = self.assertEqual
        fun = lambda x: repr(self.parser.parse(x))

        ass(fun('OR OR (AND test NOT)'), "'test'")
        ass(
            fun('((star AND wars) AND NOT trek) OR ((star AND trek) OR TOS)'),
            "((('star' AND 'wars') AND NOT('trek')) OR (('star' AND 'trek') OR 'tos'))"
        )
        ass(fun(''), "''")
コード例 #5
0
ファイル: search_engine.py プロジェクト: KGerring/fca_search
 def __init__(self, index, stopwords):
     self.parser = BooleanParser()
     self.index = index
     self.stopwords = stopwords
コード例 #6
0
ファイル: search_engine.py プロジェクト: havrlant/fca-search
	def __init__(self, index, stopwords):
		self.parser = BooleanParser()
		self.index = index
		self.stopwords = stopwords