Ejemplo n.º 1
0
class SearchEngine:
    def __init__(self, index, stopwords):
        self.parser = BooleanParser()
        self.index = index
        self.stopwords = stopwords

    def search(self, query, lang, stem=True):
        normQuery = remove_nonletters(query, ' ', ['(', ')'])
        parsedQuery, terms, wordsTerms = self._parse_query(
            normQuery, stem, lang)
        documents = self.index.get_documents(parsedQuery)
        rankedResults = score(terms, documents, self.index, lang)
        sortedResults = sorted(rankedResults,
                               key=lambda doc: doc['score'],
                               reverse=True)
        return {
            'documents': sortedResults,
            'terms': terms,
            'pureQuery': query,
            'parsedQuery': parsedQuery,
            'wordsTerms': wordsTerms
        }

    def nostemSearch(self, query, lang):
        return self.search(query, lang, False)

    def _parse_query(self, query, stem, lang):
        self.parser.stem = False
        noStemParsedQuery = self.parser.parse(query, lang, self.stopwords)
        wordsTerms = self.parser.terms(noStemParsedQuery)
        self.parser.stem = stem
        pquery = self.parser.parse(query, lang, self.stopwords)
        terms = self.parser.terms(pquery)
        return pquery, terms, wordsTerms
Ejemplo n.º 2
0
class SearchEngine:
	def __init__(self, index, stopwords):
		self.parser = BooleanParser()
		self.index = index
		self.stopwords = stopwords
	
	def search(self, query, lang, stem = True):
		normQuery = remove_nonletters(query, ' ', ['(', ')'])
		parsedQuery, terms, wordsTerms = self._parse_query(normQuery, stem, lang)
		documents = self.index.get_documents(parsedQuery)
		rankedResults = score(terms, documents, self.index, lang)
		sortedResults = sorted(rankedResults, key=lambda doc: doc['score'], reverse=True)
		return {'documents':sortedResults, 'terms':terms, 
				'pureQuery':query, 'parsedQuery':parsedQuery, 'wordsTerms':wordsTerms}
	
	def nostemSearch(self, query, lang):
		return self.search(query, lang, False)
		
	def _parse_query(self, query, stem, lang):
		self.parser.stem = False
		noStemParsedQuery = self.parser.parse(query, lang, self.stopwords)
		wordsTerms = self.parser.terms(noStemParsedQuery)
		self.parser.stem = stem
		pquery = self.parser.parse(query, lang, self.stopwords)
		terms = self.parser.terms(pquery)
		return pquery, terms, wordsTerms
Ejemplo n.º 3
0
	def test_get_documents(self):
		parse = BooleanParser().parse
		fun = lambda x: self.index.get_documents(parse(x))
		lfun = lambda x: len(fun(x))
		
		self.assertEqual(lfun('rovnice průměr'), 1)
		self.assertEqual(lfun('průměr NOT úhlopříčky'), 1)
		self.assertEqual(lfun('rovnice'), 8)
		self.assertEqual(lfun('rovnice NOT spojitost'), 5)
		self.assertEqual(lfun('(statistika OR pythagorova)'), 3)
Ejemplo n.º 4
0
class BooleanParserTest(unittest.TestCase):
    parser = BooleanParser()

    def test_pure_parse(self):
        fun = lambda x: repr(self.parser._pure_parse(x))
        ass = self.assertEqual

        ass(fun('arg1 AND arg2 OR arg3'), "(('arg1' AND 'arg2') OR 'arg3')")
        ass(fun('(ARG AND ARG (ARG OR NOT ARG))'),
            "('ARG' AND 'ARG' AND ('ARG' OR (NOT('ARG'))))")
        ass(fun(''), '()')
        ass(fun('OR OR (AND test NOT)'), "(() OR () OR ('test' AND ''))")

    def test_parse(self):
        ass = self.assertEqual
        fun = lambda x: repr(self.parser.parse(x))

        ass(fun('OR OR (AND test NOT)'), "'test'")
        ass(
            fun('((star AND wars) AND NOT trek) OR ((star AND trek) OR TOS)'),
            "((('star' AND 'wars') AND NOT('trek')) OR (('star' AND 'trek') OR 'tos'))"
        )
        ass(fun(''), "''")
Ejemplo n.º 5
0
 def __init__(self, index, stopwords):
     self.parser = BooleanParser()
     self.index = index
     self.stopwords = stopwords
Ejemplo n.º 6
0
	def __init__(self, index, stopwords):
		self.parser = BooleanParser()
		self.index = index
		self.stopwords = stopwords