def tempSearchQuery(index, query, settings, lang): if not lang: lang = settings.get('lang') searchEngine = SearchEngine(index, getStopWords(lang)) fca = FCASearchEngine(searchEngine, index, settings) searchResults = fca.search(query, lang) return searchResults
def search(self, query): database = DATABASES_FOLDER + self.databaseName + '/' settings = Settings(database + SETTINGS_FILE) index = Index(database, settings) searchEngine = SearchEngine(index, getStopWords()) fca = FCASearchEngine(searchEngine, index, settings) searchResults = fca.search(query, True) # nostemsearch return [tuple(map(self.normalizeQuery, x['words'])) for x in searchResults['specialization']][:self.maxSpec]
def searchQuery(databaseName, query, lang, stopwatch = None): index, settings = getIndexAndSettings(databaseName) if not lang: lang = settings.get('lang') searchEngine = SearchEngine(index, getStopWords(lang)) fca = FCASearchEngine(searchEngine, index, settings) searchResults = fca.search(query, lang) return searchResults
def tempSearch(path): content = readfile(path) data = loads(content); options = data.get('options', {}) lang = options.get('lang', 'en') tempIndex = TempSearch() index = tempIndex.build(data, getStopWords(lang)) res=tempSearchQuery(index, options.get('query', 'lion'), {}, lang) return getJson(res)
def buildIndex(databaseName, linksSourcePath, currSettings, lang): settings = Settings(DATA_FOLDER + SETTINGS_FILE) for key, value in currSettings.items(): settings.set(key, value) database = DATABASES_FOLDER + databaseName + '/' links = readfile(linksSourcePath).splitlines() indexManager = IndexManager(settings) indexManager.shutUp = False indexManager.build(links, database, getStopWords(lang), lang)
def search(self, query): database = DATABASES_FOLDER + self.databaseName + '/' settings = Settings(database + SETTINGS_FILE) index = Index(database, settings) searchEngine = SearchEngine(index, getStopWords()) fca = FCASearchEngine(searchEngine, index, settings) searchResults = fca.search(query, True) # nostemsearch return [ tuple(map(self.normalizeQuery, x['words'])) for x in searchResults['specialization'] ][:self.maxSpec]
def test_toIndex(self): urls = self.getURLs() sites = downloads(urls) sites = [{'type': 'html', 'content': x, 'url': 'url'} for x in sites] # savefile(repr(toIndex(sites, [], 1)), TEST_FOLDER + 'index1.txt') # savefile(repr(toIndex(sites, getStopWords(), 1)), TEST_FOLDER + 'index2.txt') # savefile(repr(toIndex(sites, getStopWords(), 2)), TEST_FOLDER + 'index3.txt') result = toIndex(sites, [], 1) desired = readfile(TEST_FOLDER + 'index1.txt') self.assertEqual(repr(result), desired) result = toIndex(sites, getStopWords(), 1) desired = readfile(TEST_FOLDER + 'index2.txt') self.assertEqual(repr(result), desired) result = toIndex(sites, getStopWords(), 2) desired = readfile(TEST_FOLDER + 'index3.txt') self.assertEqual(repr(result), desired)