Ejemplo n.º 1
0
    def run(self):
        global question_count
        logging.debug("Keeper" + "_" + threading.current_thread().name + "_" +
                      str(threading.current_thread().ident))

        self._db_manager = CupDBManager()
        try:
            self._db_manager.create_database()
        except:
            pass

        while (True):
            logging.debug("Total questions:" + str(question_count))
            question = None
            try:
                question = self.question_queue.get(
                    timeout=CupConfig.queue_timeout_in_second)
            except:
                self._db_manager.commit()
                continue

            self.save(question)
            question_count = question_count + 1
            self.question_queue.task_done()
            if self.pending_inserts > CupConfig.inserts_per_transaction:
                self._db_manager.commit()
                self.pending_inserts = 0

        self._db_manager.commit()
        self._db_manager.close()
Ejemplo n.º 2
0
 def run(self):
     logging.debug("Miner" + "_" + threading.current_thread().name + "_" +
                   str(threading.current_thread().ident))
     while True:
         page = self.page_queue.get()
         questions = CareerCupParser.parse(page)
         for q in questions:
             question_queue.put(q)
         self.page_queue.task_done()
Ejemplo n.º 3
0
    def run(self):
        global __cache__
        logging.debug("Fetcher" + "_" + threading.current_thread().name + "_" +
                      str(threading.current_thread().ident))
        while True:
            url = self.url_queue.get()
            logging.info("Fetching " + url)
            page = None
            retry_count = 0
            url_id = CareerCupParser.url_id(url)

            if url_id in __cache__:
                logging.debug("Cache hit " + url)
                page = __cache__[url_id]
                self.page_queue.put(page)
                self.url_queue.task_done()
            else:
                req = urllib2.Request(url)
                self.page_queue.put(page)
                self.url_queue.task_done()
                try:
                    page = self.browser.open(req).read()
                    self.page_queue.put(page)
                    self.url_queue.task_done()

                except Exception as e:
                    if isinstance(e, urllib2.HTTPError):
                        if e.code in [401, 403, 404, 501, 503]:
                            # raise Exception("Failed to fetch page: " + url + ", error code" + str(e.code))
                            logging.warn("Failed to fetch page: " + url +
                                         ", error code" + str(e.code))
                    # put it back to queue
                    if url not in tempts:
                        retried_urls[url_id] = 1
                        self.url_queue.put(url)
                    elif tempts[url_id] < CupConfig.retries:
                        self.url_queue.put(url)
                        tempts[url_id] = tempts[url_id] + 1
                    else:
                        # raise Exception("Failed to fetch page: " + url + " after "+ str(CupConfig.retries)+" retries")
                        logging.warning("Failed to fetch page: " + url +
                                        " after " + str(CupConfig.retries) +
                                        " retries")
                        self.url_queue.task_done()

            time.sleep(CupConfig.fetch_interval_in_second)
Ejemplo n.º 4
0
 def test_joining_current_thread(self):
     current_thread = threading.current_thread()
     self.assertRaises(RuntimeError, current_thread.join);
Ejemplo n.º 5
0
 def f(mutex):
     # Calling current_thread() forces an entry for the foreign
     # thread to get made in the threading._active map.
     threading.current_thread()
     mutex.release()
Ejemplo n.º 6
0
 def test_joining_current_thread(self):
     current_thread = threading.current_thread()
     self.assertRaises(RuntimeError, current_thread.join)
Ejemplo n.º 7
0
 def f(mutex):
     # Calling current_thread() forces an entry for the foreign
     # thread to get made in the threading._active map.
     threading.current_thread()
     mutex.release()