def __init__(self, logger=None, restrict_to=[]): if not logger: logger = common.logger.getLogger(self.__class__.__name__) self.logger = logger self.connection = MediaConnection(logger) self.restrict_to = restrict_to self.connection.delay = 1
def __init__(self, logger=None, restrict_to=[]): if not logger: logger = common.logger.getLogger(self.__class__.__name__) self.logger = logger self.connection = MediaConnection(logger) self.restrict_to = restrict_to self.connection.delay=1
class GoogleSearch(Search): google_url = 'http://www.google.com/search' def __init__(self, logger=None, restrict_to=[]): if not logger: logger = common.logger.getLogger(self.__class__.__name__) self.logger = logger self.connection = MediaConnection(logger) self.restrict_to = restrict_to self.connection.delay=1 def search(self, search_term): #Perform the search and get the text of the page. params = {'q' : search_term, 'btnG' : 'Google Search'} text = self.connection.get(GoogleSearch.google_url, params) if not text: return None #Pull out the links of results start = text.find('<div id="res">') end = text.find('<div id="foot">') if text[start:end] == '': self.logger.warn("No results for `{}`".format(search_term)) return None links = [] text = text[start:end] start = 0 end = 0 while start>-1 and end>-1: start = text.find('<a href="/url?q=') text = text[start+len('<a href="/url?q='):] end = text.find('&sa=U&ei=') if start>-1 and end>-1: link = unquote(text[0:end]) text = text[end:len(text)] if link.find('http')==0: links.append(link) #If necessary, filter the links based on content. if len(self.restrict_to) > 0: filtered_links = [] for link in links: for domain in self.restrict_to: if domain in link: filtered_links.append(link) links = list(set(filtered_links)) return links
class GoogleSearch(Search): google_url = 'http://www.google.com/search' def __init__(self, logger=None, restrict_to=[]): if not logger: logger = common.logger.getLogger(self.__class__.__name__) self.logger = logger self.connection = MediaConnection(logger) self.restrict_to = restrict_to self.connection.delay = 1 def search(self, search_term): #Perform the search and get the text of the page. params = {'q': search_term, 'btnG': 'Google Search'} text = self.connection.get(GoogleSearch.google_url, params) if not text: return None #Pull out the links of results start = text.find('<div id="res">') end = text.find('<div id="foot">') if text[start:end] == '': self.logger.warn("No results for `{}`".format(search_term)) return None links = [] text = text[start:end] start = 0 end = 0 while start > -1 and end > -1: start = text.find('<a href="/url?q=') text = text[start + len('<a href="/url?q='):] end = text.find('&sa=U&ei=') if start > -1 and end > -1: link = unquote(text[0:end]) text = text[end:len(text)] if link.find('http') == 0: links.append(link) #If necessary, filter the links based on content. if len(self.restrict_to) > 0: filtered_links = [] for link in links: for domain in self.restrict_to: if domain in link: filtered_links.append(link) links = list(set(filtered_links)) return links