def count(self, query=None, domain=None): if not domain: domain = self.domain if not query: query = self.query query = re.sub(' ', '%20', query) url = GoogleSearch.SEARCH_URL%{'domain':domain, 'query':query} mario = Mario() mario.set_proxies_list(self.proxies) response = mario.get(url) if not response: raise GoogleException('Fail to open page', 502) patterns = [re.compile('<p id=resultStats> [^^]*?<b>\d+</b> - <b>\d+</b>[^^]*?<b>([^^]*?)</b>'), re.compile('<p id=resultStats> [^^]*?<b>[^^]*?</b>[^^]*?<b>([^^]*?)</b>[^^]*?<b>\d+</b>-<b>\d+</b>')] for pattern in patterns: res = pattern.findall(response.body) if not res: continue return long(re.sub(',', '', res[0])) return 0
def _get_page(self, query, page, domain): if page == 0: if self.number_of_results == 10: url = GoogleSearch.SEARCH_URL%{'domain':domain, 'query':query} else: url = GoogleSearch.SEARCH_URL_WITH_NUMBER%{'domain':domain, 'query':query, 'num':self.number_of_results} else: if self.number_of_results == 10: url = GoogleSearch.NEXT_PAGE%{'domain':domain, 'query':query, 'start':page*self.number_of_results} else: url = GoogleSearch.NEXT_PAGE_WITH_NUMBER%{'domain':domain, 'query':query, 'num':self.number_of_results, 'start':page*self.number_of_results} mario = Mario() mario.set_proxies_list(self.proxies) response = mario.get(url) if not response: raise GoogleException('Fail to open page', 502) results = self._parse_response(response.body) if not results: return [] return [GoogleResult(result['unescape_url'], result['title'], result['description'], page*self.number_of_results+i+1) for i, result in enumerate(results)]