def run(self): global question_count logging.debug("Keeper" + "_" + threading.current_thread().name + "_" + str(threading.current_thread().ident)) self._db_manager = CupDBManager() try: self._db_manager.create_database() except: pass while (True): logging.debug("Total questions:" + str(question_count)) question = None try: question = self.question_queue.get( timeout=CupConfig.queue_timeout_in_second) except: self._db_manager.commit() continue self.save(question) question_count = question_count + 1 self.question_queue.task_done() if self.pending_inserts > CupConfig.inserts_per_transaction: self._db_manager.commit() self.pending_inserts = 0 self._db_manager.commit() self._db_manager.close()
def run(self): logging.debug("Miner" + "_" + threading.current_thread().name + "_" + str(threading.current_thread().ident)) while True: page = self.page_queue.get() questions = CareerCupParser.parse(page) for q in questions: question_queue.put(q) self.page_queue.task_done()
def run(self): global __cache__ logging.debug("Fetcher" + "_" + threading.current_thread().name + "_" + str(threading.current_thread().ident)) while True: url = self.url_queue.get() logging.info("Fetching " + url) page = None retry_count = 0 url_id = CareerCupParser.url_id(url) if url_id in __cache__: logging.debug("Cache hit " + url) page = __cache__[url_id] self.page_queue.put(page) self.url_queue.task_done() else: req = urllib2.Request(url) self.page_queue.put(page) self.url_queue.task_done() try: page = self.browser.open(req).read() self.page_queue.put(page) self.url_queue.task_done() except Exception as e: if isinstance(e, urllib2.HTTPError): if e.code in [401, 403, 404, 501, 503]: # raise Exception("Failed to fetch page: " + url + ", error code" + str(e.code)) logging.warn("Failed to fetch page: " + url + ", error code" + str(e.code)) # put it back to queue if url not in tempts: retried_urls[url_id] = 1 self.url_queue.put(url) elif tempts[url_id] < CupConfig.retries: self.url_queue.put(url) tempts[url_id] = tempts[url_id] + 1 else: # raise Exception("Failed to fetch page: " + url + " after "+ str(CupConfig.retries)+" retries") logging.warning("Failed to fetch page: " + url + " after " + str(CupConfig.retries) + " retries") self.url_queue.task_done() time.sleep(CupConfig.fetch_interval_in_second)
def test_joining_current_thread(self): current_thread = threading.current_thread() self.assertRaises(RuntimeError, current_thread.join);
def f(mutex): # Calling current_thread() forces an entry for the foreign # thread to get made in the threading._active map. threading.current_thread() mutex.release()
def test_joining_current_thread(self): current_thread = threading.current_thread() self.assertRaises(RuntimeError, current_thread.join)