Ejemplo n.º 1
0
 def __init__(self, logger=None, restrict_to=[]):
     if not logger:
         logger = common.logger.getLogger(self.__class__.__name__)
     self.logger = logger
     self.connection = MediaConnection(logger)
     self.restrict_to = restrict_to
     self.connection.delay = 1
Ejemplo n.º 2
0
 def __init__(self, logger=None, restrict_to=[]):
   if not logger:
     logger = common.logger.getLogger(self.__class__.__name__)
   self.logger = logger
   self.connection = MediaConnection(logger)
   self.restrict_to = restrict_to
   self.connection.delay=1
Ejemplo n.º 3
0
class GoogleSearch(Search):

  google_url = 'http://www.google.com/search'

  def __init__(self, logger=None, restrict_to=[]):
    if not logger:
      logger = common.logger.getLogger(self.__class__.__name__)
    self.logger = logger
    self.connection = MediaConnection(logger)
    self.restrict_to = restrict_to
    self.connection.delay=1
  
  def search(self, search_term):
    #Perform the search and get the text of the page.
    params = {'q' : search_term,
              'btnG' : 'Google Search'}
    text = self.connection.get(GoogleSearch.google_url, params)
    if not text:
      return None
    #Pull out the links of results
    start = text.find('<div id="res">')
    end = text.find('<div id="foot">')
    if text[start:end] == '':
      self.logger.warn("No results for `{}`".format(search_term))
      return None
    links = []
    text  = text[start:end]
    start = 0
    end   = 0
    while start>-1 and end>-1:
      start = text.find('<a href="/url?q=')
      text = text[start+len('<a href="/url?q='):]
      end = text.find('&amp;sa=U&amp;ei=')
      if start>-1 and end>-1: 
        link = unquote(text[0:end])
        text = text[end:len(text)]
        if link.find('http')==0:
            links.append(link)

    #If necessary, filter the links based on content.
    if len(self.restrict_to) > 0:
      filtered_links = []
      for link in links:
        for domain in self.restrict_to:
          if domain in link:
            filtered_links.append(link)
      links = list(set(filtered_links))
    return links
Ejemplo n.º 4
0
class GoogleSearch(Search):

    google_url = 'http://www.google.com/search'

    def __init__(self, logger=None, restrict_to=[]):
        if not logger:
            logger = common.logger.getLogger(self.__class__.__name__)
        self.logger = logger
        self.connection = MediaConnection(logger)
        self.restrict_to = restrict_to
        self.connection.delay = 1

    def search(self, search_term):
        #Perform the search and get the text of the page.
        params = {'q': search_term, 'btnG': 'Google Search'}
        text = self.connection.get(GoogleSearch.google_url, params)
        if not text:
            return None
        #Pull out the links of results
        start = text.find('<div id="res">')
        end = text.find('<div id="foot">')
        if text[start:end] == '':
            self.logger.warn("No results for `{}`".format(search_term))
            return None
        links = []
        text = text[start:end]
        start = 0
        end = 0
        while start > -1 and end > -1:
            start = text.find('<a href="/url?q=')
            text = text[start + len('<a href="/url?q='):]
            end = text.find('&amp;sa=U&amp;ei=')
            if start > -1 and end > -1:
                link = unquote(text[0:end])
                text = text[end:len(text)]
                if link.find('http') == 0:
                    links.append(link)

        #If necessary, filter the links based on content.
        if len(self.restrict_to) > 0:
            filtered_links = []
            for link in links:
                for domain in self.restrict_to:
                    if domain in link:
                        filtered_links.append(link)
            links = list(set(filtered_links))
        return links