Ejemplo n.º 1
0
 def __init__(self, to_proc_queue, res_queue, js=True):
     """
         the fetcher obj to use, the url, a generic 'marker' to be passed to
         both callbacks to keep a reference, a func to call on any new urls
         found, and a func to call on the result page.
     """
     Thread.__init__(self)
     self.fetcher = getFetcher(js=js)
     self.to_proc_queue = to_proc_queue
     self.res_queue = res_queue
Ejemplo n.º 2
0
 def scrape(self):
     """
         begins iterating through every page of results and returns
         a list of company objects for each
     """
     url = "http://www.yellowpages.com/search?search_terms={0}&geo_location_terms={1} {2}&page={3}"
     fetcher = getFetcher(js=self.js)
     companies = []
     page = 1
     while True:
         res = bs(fetcher.get(url.format(self.occ, self.city, self.state, page)), 'html.parser')
         cs = self._parse_yp_response(res)
         if not cs:
             break
         else:
             companies += cs
         page += 1
     fetcher.teardown()
     return companies