def run(self):
     timer = Timer()
     timer.start()
         
     # Create list of search urls
     search_urls = []
     filter = Filter(self.filter, self.startpage, self.maxpages)
     for page in range(self.startpage, self.startpage + self.maxpages):
         search_urls.append(filter.create_filter_url((page - 1) * 10))
     
     # Create pool of worker threads
     pool = ThreadPool(4)
     # Open the urls in their own threads
     organisaties = pool.map(unwrap_self_process_search, zip([self] * len(search_urls), search_urls))
     pool.close()
     pool.join()
 
     results = {}
     results["organisaties"] = self.consolidate(organisaties)
     
     timer.stop()
     
     results["stats"] = { "exectime": timer.exectime(), "matches": { "total": str(self.search_results["results"]), "pages": str(self.search_results["pages"]) }, "read": { "page_from": str(self.startpage), "page_to": str(self.maxpages) } }
     
     return results
 def __init__(self, filter, startpage, maxpages):
     self.filter = filter
     self.startpage = startpage
     self.maxpages = maxpages
     
     filter = Filter(self.filter, self.startpage, self.maxpages)
     self.search_url = filter.create_filter_url(self.startpage)
     handler = Handler(self.search_url)
     self.search_results = handler.init()
   
     if self.search_results["pages"] < startpage: 
         raise Exception("Error: startpage exceeds available pages [pages=" + str(self.search_results["pages"]) + "]")
     if self.search_results["pages"] < startpage + maxpages:
         self.maxpages = self.search_results["pages"]
Exemple #3
0
    def __init__(self, filter, startpage, maxpages):
        self.filter = filter
        self.startpage = startpage
        self.maxpages = maxpages

        filter = Filter(self.filter, self.startpage, self.maxpages)
        self.search_url = filter.create_filter_url(self.startpage)
        handler = Handler(self.search_url)
        self.search_results = handler.init()

        if self.search_results["pages"] < startpage:
            raise Exception(
                "Error: startpage exceeds available pages [pages=" +
                str(self.search_results["pages"]) + "]")
        if self.search_results["pages"] < startpage + maxpages:
            self.maxpages = self.search_results["pages"]
Exemple #4
0
    def run(self):
        timer = Timer()
        timer.start()

        # Create list of search urls
        search_urls = []
        filter = Filter(self.filter, self.startpage, self.maxpages)
        for page in range(self.startpage, self.startpage + self.maxpages):
            search_urls.append(filter.create_filter_url((page - 1) * 10))

        # Create pool of worker threads
        pool = ThreadPool(4)
        # Open the urls in their own threads
        organisaties = pool.map(unwrap_self_process_search,
                                zip([self] * len(search_urls), search_urls))
        pool.close()
        pool.join()

        results = {}
        results["organisaties"] = self.consolidate(organisaties)

        timer.stop()

        results["stats"] = {
            "exectime": timer.exectime(),
            "matches": {
                "total": str(self.search_results["results"]),
                "pages": str(self.search_results["pages"])
            },
            "read": {
                "page_from": str(self.startpage),
                "page_to": str(self.maxpages)
            }
        }

        return results