def run(self): timer = Timer() timer.start() # Create list of search urls search_urls = [] filter = Filter(self.filter, self.startpage, self.maxpages) for page in range(self.startpage, self.startpage + self.maxpages): search_urls.append(filter.create_filter_url((page - 1) * 10)) # Create pool of worker threads pool = ThreadPool(4) # Open the urls in their own threads organisaties = pool.map(unwrap_self_process_search, zip([self] * len(search_urls), search_urls)) pool.close() pool.join() results = {} results["organisaties"] = self.consolidate(organisaties) timer.stop() results["stats"] = { "exectime": timer.exectime(), "matches": { "total": str(self.search_results["results"]), "pages": str(self.search_results["pages"]) }, "read": { "page_from": str(self.startpage), "page_to": str(self.maxpages) } } return results
def __init__(self, filter, startpage, maxpages): self.filter = filter self.startpage = startpage self.maxpages = maxpages filter = Filter(self.filter, self.startpage, self.maxpages) self.search_url = filter.create_filter_url(self.startpage) handler = Handler(self.search_url) self.search_results = handler.init() if self.search_results["pages"] < startpage: raise Exception("Error: startpage exceeds available pages [pages=" + str(self.search_results["pages"]) + "]") if self.search_results["pages"] < startpage + maxpages: self.maxpages = self.search_results["pages"]
def __init__(self, filter, startpage, maxpages): self.filter = filter self.startpage = startpage self.maxpages = maxpages filter = Filter(self.filter, self.startpage, self.maxpages) self.search_url = filter.create_filter_url(self.startpage) handler = Handler(self.search_url) self.search_results = handler.init() if self.search_results["pages"] < startpage: raise Exception( "Error: startpage exceeds available pages [pages=" + str(self.search_results["pages"]) + "]") if self.search_results["pages"] < startpage + maxpages: self.maxpages = self.search_results["pages"]