Ejemplo n.º 1
0
	def __init__(self, generation, mgr_interval=5):
		self.settings = Settings.getInstance()
		self.debug = self.settings.debug

		self.gen = generation
		self.mgr_interval = mgr_interval

		self.person_queue 		 = Queue.Queue(maxsize=self.settings.person_cache_size)
		self.person_id_set	 	 = set([])	# sync with queue, quick contains using id. 

		self.pubmap		 		 = {}		# {id -> pub}
		self.person_pub_map		 = {}		# {person_id->[pub_id_list]} - person to pub_ids
		self.pub_db_cache 		 = {}

		self.pub_lock			 = threading.Lock()
		self.pub_dbcache_lock 	 = threading.RLock()

		self.running = True #sync ed with main running flag in mgr_interval_thread
		self.blocked_pub_t 		 = 0

		# time sum
		self.ppt_wait = 0
		self.ppt_getlock = 0
		self.ppt_get = 0

		self.person_dao = PersonDao()
		self.pub_dao = PublicationDao()
Ejemplo n.º 2
0
    def __init__(self):
        print "Task: extract paper's citation from schooler.google.com.\n"
        self.settings = Settings.getInstance()
        self.debug = self.settings.debug

        # Configs
        self.mgr_interval = 10  # seconds
        self.max_person_thread = 2  # max threads used to extract person,
        self.max_pub_thread = 2  # these 2 values can modified on the fly. diff in day or night

        # Threads and configurations
        self.t_mgr = None  # MgrThread(self)	# management thread, create
        self.t_provider = None
        self.person_thread_pool = [
        ]  #= Queue.Queue(maxsize=self.max_person_thread)
        self.pub_thread_pool = []  #= Queue.Queue(maxsize=self.max_pub_thread)

        self.busy_semaphore = 0  # 用来监视是否所有的线程都处于Idle状态
        self.busy_semaphore_lock = threading.Lock()  # 用来监视是否所有的线程都处于Idle状态

        # utils
        self.store = None

        # switchers & flags
        self.running = True  # If False, threads will stop after current task.
        self.stopped = False  # If MGRThread can stop.
        self.pause = False  # All works paused.
        self.waiting_to_finish = False  # No additional data. all added to queue.
        self.num_report = 0
        self.last_report_time = datetime.datetime.now()  # 上次Interval的时间

        self.restart_all_thread = False
        self.detect_exit_wait = 0  # 当刚刚从pause模式退出来时,会有大量failed的任务,会导致立刻再次等待

        self.generation = 0

        self.dao = dbs()
        self.personDao = PersonDao()
        self.pubDao = PublicationDao()

        if self.settings.save_pdflink:
            self.pdfcache = PDFLinkSaver.getInstance()

        # start
        self.determineGereration()
Ejemplo n.º 3
0
    def __init__(self):
        print "Task: extract paper's citation from schooler.google.com.\n"
        self.settings = Settings.getInstance()
        self.debug = self.settings.debug

        # Configs
        self.mgr_interval = 10  # seconds
        self.max_person_thread = 2  # max threads used to extract person,
        self.max_pub_thread = 2  # these 2 values can modified on the fly. diff in day or night

        # Threads and configurations
        self.t_mgr = None  # MgrThread(self)	# management thread, create
        self.t_provider = None
        self.person_thread_pool = []  # = Queue.Queue(maxsize=self.max_person_thread)
        self.pub_thread_pool = []  # = Queue.Queue(maxsize=self.max_pub_thread)

        self.busy_semaphore = 0  # 用来监视是否所有的线程都处于Idle状态
        self.busy_semaphore_lock = threading.Lock()  # 用来监视是否所有的线程都处于Idle状态

        # utils
        self.store = None

        # switchers & flags
        self.running = True  # If False, threads will stop after current task.
        self.stopped = False  # If MGRThread can stop.
        self.pause = False  # All works paused.
        self.waiting_to_finish = False  # No additional data. all added to queue.
        self.num_report = 0
        self.last_report_time = datetime.datetime.now()  # 上次Interval的时间

        self.restart_all_thread = False
        self.detect_exit_wait = 0  # 当刚刚从pause模式退出来时,会有大量failed的任务,会导致立刻再次等待

        self.generation = 0

        self.dao = dbs()
        self.personDao = PersonDao()
        self.pubDao = PublicationDao()

        if self.settings.save_pdflink:
            self.pdfcache = PDFLinkSaver.getInstance()

            # start
        self.determineGereration()
Ejemplo n.º 4
0
class GoogleScholarExtractor:
	'''Author gb<elivoa[AT]gmail.com> v0.4.0'''

	def __init__(self):
		print "Task: extract paper's citation from scholar.google.com.\n"
		self.settings = Settings.getInstance()
		self.debug = self.settings.debug
		self.threadChildren = 0
		self.PersonThreadActive = 0
		self.PubThreadActive = 0

		self.mgr_interval 		 = 8		# seconds
		self.t_mgr 				 = None 	# MgrThread(self)	# management thread, create
		self.t_provider 		 = None
		self.person_thread_pool	 = []		#= Queue.Queue(maxsize=self.settings.max_person_thread)
		self.pub_thread_pool	 = []		#= Queue.Queue(maxsize=self.settings.max_pub_thread)

		self.busy_semaphore 	 = 0 				# 用来监视是否所有的线程都处于Idle状态
		self.busy_semaphore_lock = threading.Lock() # 用来监视是否所有的线程都处于Idle状态

		self.busy_person_semaphore = 0
		self.busy_pub_semaphore = 0

		self.store				 = None

		self.running 			 = True			# If False, threads will stop after current task.
		self.stopped			 = False		# If MGRThread can stop.
		self.pause 				 = False		# All works paused.
		self.waiting_to_finish 	 = False		# No additional data. all added to queue.
		self.num_report 		 = 0
		self.last_report_time	 = datetime.datetime.now()			# 上次Interval的时间

		self.restart_all_thread = False
		self.detect_exit_wait 	 = 0			# 当刚刚从pause模式退出来时,会有大量failed的任务,会导致立刻再次等待

		self.generation 		 = 0
		
		self.dao = dbs()
		self.personDao = PersonDao()
		self.pubDao = PublicationDao()

		if self.settings.save_pdflink:
			self.pdfcache = PDFLinkSaver.getInstance()

		self.determineGereration();

	# determine if the program could run or wait, or load continue status.
	def determineGereration(self):
		self.generation = self.dao.getGeneration()
		currentMinGen = self.dao.getMinGenerationInDB()
		currentMaxGen = self.dao.getMaxGenerationInDB()

		print '====================================================================='
		print " * Required update_generation is: [ %s ]." % (self.generation)
		print " * Current min update_generation is: [ %s ]." % (currentMinGen)
		
		# process generation
		if currentMinGen < self.generation == currentMaxGen or self.generation > currentMaxGen:
			print " * Not finished task, continue to finish current generation."
		elif self.generation == currentMinGen == currentMaxGen:
			print " * Just start new generation";
			self.generation = self.generation + 1
			self.dao.setGeneration(self.generation)
		else:
			print "=== Error: generation(%s) bigger than currentMinGen(%s)" % (self.generation, currentMinGen)
			self.generation = currentMaxGen
			self.dao.setGeneration(self.generation)

		# count task progress
		print " * Process NA Persons : %s." % self.reportPersonProgress(self.generation)
		print " * Process Publication: %s." % self.reportPublicationProgress(self.generation)
		print '====================================================================='


	def reportPersonProgress(self, udpate_generation):
		''' Return String that report progress of person.
		'''
		total = self.personDao.getPersonTotalCount()
		left = self.personDao.getPersonLeftCount(udpate_generation)
		progress = float(total - left) / total * 100.0 
		return "[%6.2f%%] %s/%s" % (progress, total - left, total)

	
	def reportPublicationProgress(self, udpate_generation):
		''' Return String that report progress of person.
		'''
		total = self.pubDao.getTotalCount()
		left = self.pubDao.getLeftCount(udpate_generation)
		progress = float(total - left) / total * 100.0
		return "[%6.2f%%] %s/%s" % (progress, total - left, total)


	def start(self):
		'''Extract Citation Multithread
		- Start main threads...
		- Manager Threads
		- Person Provider Thread
		- Publication Download Thread
		- ...
		'''
		self.store = Store(self.generation, self.mgr_interval)

		self.t_mgr = threading.Thread(target=self.mgrThreadBody, args=(), name='thread-mgr') # use method mgr.
		self.t_mgr.start()

		self.t_provider = ProviderThread(self, None)
		self.t_provider.start()

		# waiting to finish
		self.t_mgr.join()
		print "============ ALL END ============"


	def wait_for_pause(self):
		while self.pause:
			time.sleep(self.mgr_interval)

	#
	# Management Thread
	#
	def mgrThreadBody(self):
		"Management Thread"
		print "#init:> start mgr & provider."
		getter = HtmlRetriever.getInstance(self.settings.use_proxy)

		while self.running or not self.stopped:
			# interval seconds passed.
			interval_seconds = (datetime.datetime.now() - self.last_report_time).seconds
			if interval_seconds == 0: interval_seconds = 1
			self.last_report_time = datetime.datetime.now();

			try:
				self.PersonThreadActive = 0
				self.PubThreadActive = 0
				for x in self.person_thread_pool:
					if x.check_idle():
						self.PersonThreadActive += 1	
				for y in self.pub_thread_pool:
					if y.check_idle():
						self.PubThreadActive += 1
			except Exception:
				print "ERROR:count errer"
				print Exception

			try:
				# save pdf link
				if self.settings.save_pdflink:
					self.pdfcache.flush()
			except Exception:
				print "ERROR: pdf link"
				print Exception

			message = None

			# 什么时候重启所有线程&进程
			reload_all_thread = False
			if self.num_report % 1000 == 0:
				reload_all_thread = True
				message = "Kill & Restart All Thread."
			
			try:
				# Maintain Threads and get worker threads status.
				(num_persont_alive, num_pubt_alive) = self._maintainThreadPool(reload_all_thread=False)
			except Exception:
				print "ERROR: maintain threads and worker"
				print Exception

			try:
				# Finish Condition.
				if self._checkFinishCondition():
					self.running = False					# -> tell all threads finish.
					message = "MESSAGE! Send terminal signal to all worker thread."
			except Exception:
				print "ERROR: condition check"
				print Exception
				
			# if all worker threads stopped, mgrThread can stop.
			if num_persont_alive == 0 and num_pubt_alive == 0:
				self.stopped = True
				message = "Send terminal signal to mgr_thread."

			# check network and count 
			period_success_connection = getter.success_connection_count - getter.last_success_connection_count
			period_bad_connection = getter.bad_connection_count - getter.last_bad_connection_count
			total_connections = period_success_connection + period_bad_connection
			getter.last_success_connection_count = getter.success_connection_count
			getter.last_bad_connection_count = getter.bad_connection_count

			average_success_persecond = period_success_connection / float(interval_seconds)
			average_bad_persecond = period_bad_connection / float(interval_seconds)

			if False: # 是否Block模式,就是暂停整个程序
				if getter.detect_mode:
					if getter.detect_success_count > 3:
						getter.leave_detect_mode()
						self.detect_exit_wait = 1 # 刚出来时,下两轮都不要再进入block模式了。
				else:
					if total_connections * 0.9 < period_bad_connection:
						if self.detect_exit_wait > 0:
							print "---- waiting %s rounds ----" % self.detect_exit_wait
							self.detect_exit_wait -= 1
						else:
							getter.enter_detect_mode()

			################ print interval string ################
			try:
				# print report
				if not getter.detect_mode:
					str_report = None
					if not self.pause:
						self.num_report += 1
						str_report = self.num_report
					else:
						str_report = "paused"
					
					report_strs = []
					report_strs.append("-" * 100)
					report_strs.append("\n")
					report_strs.append("$&mgr:%s(%s):> " % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), str_report))
					report_strs.append("Person(%sT on %s), " % (num_persont_alive, self.store.person_queue.qsize()))
					report_strs.append("Pub(%sT on %s, %s items), " % (num_pubt_alive, len(self.store.pubmap), len(self.store.person_pub_map)))
					report_strs.append("DBCache({{{ %s }}}), " % len(self.store.pub_db_cache))
					report_strs.append("T(busy/idle)(%s/%s), " % (self.busy_semaphore, self.settings.max_person_thread + self.settings.max_pub_thread - self.busy_semaphore))
					report_strs += '\n'
					report_strs.append("Person(busy/idle)(%s/%s), Pub(busy/idle)(%s/%s)" % (self.busy_person_semaphore, self.settings.max_person_thread-self.busy_person_semaphore, self.busy_pub_semaphore, self.settings.max_pub_thread-self.busy_pub_semaphore))
					g = getter.success_connection_count
					b = getter.bad_connection_count
					t = g + b
					rate = 0
					if(t > 0):
						rate = g / float(t)
					report_strs.append("network(g+b=t)=(%s+%s=%s),rate=%.2f " % (g, b , t, rate))
					report_strs.append("interval-network(g+b=t)=(%s+%s=%s), " % (period_success_connection, period_bad_connection, total_connections))
					report_strs.append("avg:(g%.1f b%.1f in %s seconds.), " % (average_success_persecond, average_bad_persecond, interval_seconds))
					report_strs.append("\n")
					report_strs.append("now have %s child threads, " % self.threadChildren)
					report_strs.append("active threads (%s person, %s pub) , " % (self.PersonThreadActive, self.PubThreadActive))
					report_strs.append("\n")
					report_strs.append("time:(wait=%.2f, getlock=%.2f, get=%.2f)" % (self.store.ppt_wait, self.store.ppt_getlock, self.store.ppt_get))
					if message is not None:
						report_strs.append("\n")
						report_strs.append(message)
					report_strs.append("\n")
						
					report_strs.append(" * Process NA Persons : %s.\n" % self.reportPersonProgress(self.generation))
					report_strs.append(" * Process Publication: %s.\n" % self.reportPublicationProgress(self.generation))
					report_strs.append("-" * 100)
					report_strs.append("\n")

					print "".join(report_strs)
					if (self.num_report%100 == 0):
						mr = MailReporter()
						mr.report(report_strs)
			except Exception:
				print "ERROR: report error"
				print Exception

			try:
				self.store.flushDBCache()				# last flush cache to db.
				self.store.running = self.running		# pass main running thread to Store object.
			except Exception:
				print "ERROR: flush db cache"
				print Exception

			time.sleep(self.mgr_interval) 			# interval

		print "$mgr:> exit."

	def _checkFinishCondition(self):
		'''@return: true if all can stop.'''
		# Finish Condition.
		if self.waiting_to_finish and not self.pause: 		# Provider report finish and not paused.
			if self.busy_semaphore == 0: 					# all threads' status must be idle.
				if self.store.person_queue.empty() \
				  		and len(self.store.pubmap) == 0 \
				  		and len(self.store.pub_db_cache) == 0: 	# task queue must be empty
					left = self.pubDao.getLeftCount(self.generation)
					if left == 0:	# really finished.
						return True
		return False
	
	
	def _adjustThreadNum(self):
		# strength by period of day. 
		hour = datetime.datetime.now().hour
		if hour <= 9:
			self.settings.max_person_thread = 25
			self.settings.max_pub_thread = 75
		elif 22 <= hour:
			self.settings.max_person_thread = 16
			self.settings.max_pub_thread = 40
		else:
			self.settings.max_person_thread = 22
			self.settings.max_pub_thread = 60

		
	def _maintainThreadPool(self, reload_all_thread):
		'''
		Maintain ThreadPool, detect and restart, and set running threads on the fly.
		'''
		num_persont_alive = 0
		num_pubt_alive = 0

#		if reload_all_thread: # kill all thread first.
		if False:#we don't want reload fucntion until fix the bug : pub thread idle  
			for idx_pub_t in range(0, self.settings.max_pub_thread):
				t = None
				if len(self.pub_thread_pool) <= idx_pub_t:
					self.pub_thread_pool.append(t)
				else:
					t = self.pub_thread_pool[idx_pub_t]
				if t is not None:
					t.ask_to_stop = True
			self.pub_thread_pool = []

		#fill thread pool
		while len(self.person_thread_pool)<self.settings.max_person_thread:
			self.person_thread_pool.append(None)

		while len(self.pub_thread_pool)<self.settings.max_pub_thread:
			self.pub_thread_pool.append(None)

		# check and start all unstarted threads.
		idx_person_t = 0
		for idx_person_t in range(0, self.settings.max_person_thread):
			t = self.person_thread_pool[idx_person_t]

			if t is None: # if is None(new add) or dead.
				if self.running:
					t = PersonProcessThread(self)
					t.name = 'person-thread-' + str(idx_person_t)
					self.person_thread_pool[idx_person_t] = t
					t.start()
					with self.busy_semaphore_lock:
						self.threadChildren += 1
						num_persont_alive += 1
			elif not t.is_alive() or not t.check_idle():
				if self.running:
					killedname = t.name
					t.stop()
					print "$mgr/thread:> kill thread %s" % killedname
					t = PersonProcessThread(self)
					t.name = 'person-thread-' + str(idx_person_t)
					self.person_thread_pool[idx_person_t] = t
					t.start()
					with self.busy_semaphore_lock:
						self.threadChildren += 1
						num_persont_alive += 1
			else:
				num_persont_alive += 1

		# check and start all unstarted threads.
		idx_pub_t = 0
		for idx_pub_t in range(0, self.settings.max_pub_thread):
			t = self.pub_thread_pool[idx_pub_t]
			
			if t is None:
				if self.running:
					t = PubProcessThread(self)
					t.name = 'pub-thread-' + str(idx_pub_t)
					self.pub_thread_pool[idx_pub_t] = t
					t.start()
					with self.busy_semaphore_lock:
						self.threadChildren += 1
						num_pubt_alive += 1
			elif not t.is_alive() or not t.check_idle():
				if self.running:
					killedname = t.name
					t.stop()
					print "$mgr/thread:> kill thread %s" % killedname
					t = PubProcessThread(self)
					t.name = 'pub-thread-' + str(idx_pub_t)
					self.pub_thread_pool[idx_pub_t] = t
					t.start()
					with self.busy_semaphore_lock:
						self.threadChildren += 1
						num_pubt_alive += 1
			else:
				num_pubt_alive += 1

		return (num_persont_alive, num_pubt_alive)
Ejemplo n.º 5
0
class GoogleScholarExtractor:
    '''Author gb<elivoa[AT]gmail.com> v0.4.0'''
    def __init__(self):
        print "Task: extract paper's citation from schooler.google.com.\n"
        self.settings = Settings.getInstance()
        self.debug = self.settings.debug

        # Configs
        self.mgr_interval = 10  # seconds
        self.max_person_thread = 2  # max threads used to extract person,
        self.max_pub_thread = 2  # these 2 values can modified on the fly. diff in day or night

        # Threads and configurations
        self.t_mgr = None  # MgrThread(self)	# management thread, create
        self.t_provider = None
        self.person_thread_pool = [
        ]  #= Queue.Queue(maxsize=self.max_person_thread)
        self.pub_thread_pool = []  #= Queue.Queue(maxsize=self.max_pub_thread)

        self.busy_semaphore = 0  # 用来监视是否所有的线程都处于Idle状态
        self.busy_semaphore_lock = threading.Lock()  # 用来监视是否所有的线程都处于Idle状态

        # utils
        self.store = None

        # switchers & flags
        self.running = True  # If False, threads will stop after current task.
        self.stopped = False  # If MGRThread can stop.
        self.pause = False  # All works paused.
        self.waiting_to_finish = False  # No additional data. all added to queue.
        self.num_report = 0
        self.last_report_time = datetime.datetime.now()  # 上次Interval的时间

        self.restart_all_thread = False
        self.detect_exit_wait = 0  # 当刚刚从pause模式退出来时,会有大量failed的任务,会导致立刻再次等待

        self.generation = 0

        self.dao = dbs()
        self.personDao = PersonDao()
        self.pubDao = PublicationDao()

        if self.settings.save_pdflink:
            self.pdfcache = PDFLinkSaver.getInstance()

        # start
        self.determineGereration()

    # determine if the program could run or wait, or load continue status.
    def determineGereration(self):
        self.generation = self.dao.getGeneration()
        currentMinGen = self.dao.getMinGenerationInDB()
        currentMaxGen = self.dao.getMaxGenerationInDB()

        print '====================================================================='
        print " * Required update_generation is: [ %s ]." % (self.generation)
        print " * Current min update_generation is: [ %s ]." % (currentMinGen)

        # process generation
        if currentMinGen < self.generation == currentMaxGen or self.generation > currentMaxGen:
            print " * Not finished task, continue to finish current generation."
        elif self.generation == currentMinGen == currentMaxGen:
            print " * Just start new generation"
            self.generation = self.generation + 1
            self.dao.setGeneration(self.generation)
        else:
            print "=== Error: generation(%s) bigger than currentMinGen(%s)" % (
                self.generation, currentMinGen)
            self.generation = currentMaxGen
            self.dao.setGeneration(self.generation)

        # count task progress
        print " * Process NA Persons : %s." % self.reportPersonProgress(
            self.generation)
        print " * Process Publication: %s." % self.reportPublicationProgress(
            self.generation)
        print '====================================================================='

    def reportPersonProgress(self, udpate_generation):
        ''' Return String that report progress of person.
		'''
        total = self.personDao.getPersonTotalCount()
        left = self.personDao.getPersonLeftCount(udpate_generation)
        progress = float(total - left) / total * 100.0
        return "[%6.2f%%] %s/%s" % (progress, total - left, total)

    def reportPublicationProgress(self, udpate_generation):
        ''' Return String that report progress of person.
		'''
        total = self.pubDao.getTotalCount()
        left = self.pubDao.getLeftCount(udpate_generation)
        progress = float(total - left) / total * 100.0
        return "[%6.2f%%] %s/%s" % (progress, total - left, total)

    def start(self):
        '''Extract Citation Multithread
		- Start main threads...
		- Manager Threads
		- Person Provider Thread
		- Publication Download Thread
		- ...
		'''
        self.store = Store(self.generation, self.mgr_interval)

        self.t_mgr = threading.Thread(target=self.mgrThreadBody,
                                      args=(),
                                      name='thread-mgr')  # use method mgr.
        self.t_mgr.start()

        self.t_provider = ProviderThread(self, None)
        self.t_provider.start()

        # waiting to finish
        self.t_mgr.join()
        print "============ ALL END ============"

    def wait_for_pause(self):
        while self.pause:
            time.sleep(self.mgr_interval)

    #
    # Management Thread
    #
    def mgrThreadBody(self):
        '''Management Thread
		'''
        print "$init:> start mgr & provider."
        getter = HtmlRetriever.getInstance(self.settings.use_proxy)

        while self.running or not self.stopped:

            # interval seconds passed.
            interval_seconds = (datetime.datetime.now() -
                                self.last_report_time).seconds
            if interval_seconds == 0: interval_seconds = 1
            self.last_report_time = datetime.datetime.now()

            # --------------------------------------------------------
            # strength by period of day.
            hour = datetime.datetime.now().hour
            if hour <= 9:  # 12h-9h
                self.max_person_thread = 25
                self.max_pub_thread = 75
            elif 22 <= hour:  # 9h-22h
                self.max_person_thread = 16
                self.max_pub_thread = 40
            else:  # 22h-24h
                self.max_person_thread = 22
                self.max_pub_thread = 60

            self.max_person_thread = 2
            self.max_pub_thread = 2
            # --------------------------------------------------------

            try:
                # save pdf link
                if self.settings.save_pdflink:
                    self.pdfcache.flush()
            except e:
                print "ERROR: pdf link"
                print e

            # message
            message = None

            # 什么时候重启所有线程&进程。
            reload_all_thread = False
            if self.num_report % 1000 == 0:
                reload_all_thread = True
                message = "Kill & Restart All Thread."

            try:
                # Maintain Threads and get worker threads status.
                (num_persont_alive,
                 num_pubt_alive) = self._maintainThreadPool(reload_all_thread)
            except e:
                print "ERROR: maintain threads and worker"
                print e

            try:
                # Finish Condition.
                if self._checkFinishCondition():
                    self.running = False  # -> tell all threads finish.
                    message = "MESSAGE! Send terminal signal to all worker thread."
            except e:
                print "ERROR: condition check"
                print e

            # if all worker threads stopped, mgrThread can stop.
            if num_persont_alive == 0 and num_pubt_alive == 0:
                self.stopped = True
                message = "Send terminal signal to mgr_thread."

            # check network and count
            period_success_connection = getter.success_connection_count - getter.last_success_connection_count
            period_bad_connection = getter.bad_connection_count - getter.last_bad_connection_count
            total_connections = period_success_connection + period_bad_connection
            getter.last_success_connection_count = getter.success_connection_count
            getter.last_bad_connection_count = getter.bad_connection_count

            average_success_persecond = period_success_connection / float(
                interval_seconds)
            average_bad_persecond = period_bad_connection / float(
                interval_seconds)

            if False:  # 是否Block模式,就是暂停整个程序
                if getter.detect_mode:
                    if getter.detect_success_count > 3:
                        getter.leave_detect_mode()
                        self.detect_exit_wait = 1  # 刚出来时,下两轮都不要再进入block模式了。
                else:
                    if total_connections * 0.9 < period_bad_connection:
                        if self.detect_exit_wait > 0:
                            print "---- waiting %s rounds ----" % self.detect_exit_wait
                            self.detect_exit_wait -= 1
                        else:
                            getter.enter_detect_mode()

            try:
                # print report
                if not getter.detect_mode:
                    str_report = None
                    if not self.pause:
                        self.num_report += 1
                        str_report = self.num_report
                    else:
                        str_report = "paused"

                    #--------------------------------------------------------------------------------
                    # print interval string.
                    report_strs = []
                    report_strs.append("-" * 100)
                    report_strs.append("\n")
                    report_strs.append(
                        "$&mgr:%s(%s):> " %
                        (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                         str_report))
                    report_strs.append(
                        "Person(%sT on %s), " %
                        (num_persont_alive, self.store.person_queue.qsize()))
                    report_strs.append(
                        "Pub(%sT on %s), " %
                        (num_pubt_alive, len(self.store.pubmap)))
                    report_strs.append("DBCache({{{ %s }}}), " %
                                       len(self.store.pub_db_cache))
                    report_strs.append(
                        "T(busy/idle)(%s/%s), " %
                        (self.busy_semaphore, self.max_person_thread +
                         self.max_pub_thread - self.busy_semaphore))
                    report_strs.append("\n")
                    g = getter.success_connection_count
                    b = getter.bad_connection_count
                    t = g + b
                    rate = 0
                    if (t > 0):
                        rate = g / float(t)
                    report_strs.append("network(g+b=t)=(%s+%s=%s),rate=%.2f " %
                                       (g, b, t, rate))
                    report_strs.append(
                        "interval-network(g+b=t)=(%s+%s=%s), " %
                        (period_success_connection, period_bad_connection,
                         total_connections))
                    report_strs.append(
                        "avg:(g%.1f b%.1f in %s seconds.), " %
                        (average_success_persecond, average_bad_persecond,
                         interval_seconds))
                    report_strs.append("\n")
                    report_strs.append(
                        "time:(wait=%.2f, getlock=%.2f, get=%.2f)" %
                        (self.store.ppt_wait, self.store.ppt_getlock,
                         self.store.ppt_get))
                    if message is not None:
                        report_strs.append("\n")
                        report_strs.append(message)
                    report_strs.append("\n")
                    report_strs.append(
                        " * Process NA Persons : %s.\n" %
                        self.reportPersonProgress(self.generation))
                    report_strs.append(
                        " * Process Publication: %s.\n" %
                        self.reportPublicationProgress(self.generation))
                    report_strs.append("-" * 100)
                    report_strs.append("\n")

                    print "".join(report_strs)
                    #--------------------------------------------------------------------------------
            except e:
                print "ERROR: report error"
                print e

            try:
                # flush db cache
                self.store.flushDBCache()  # last flush cache to db.
                self.store.running = self.running  # pass main running thread to Store object.
            except e:
                print "ERROR: flush db cache"
                print e

            time.sleep(self.mgr_interval)  # interval

        print "$mgr:> exit."

    def _checkFinishCondition(self):
        '''@return: true if all can stop.'''
        # Finish Condition.
        if self.waiting_to_finish and not self.pause:  # Provider report finish and not paused.
            if self.busy_semaphore == 0:  # all threads' status must be idle.
                if self.store.person_queue.empty() \
                    and len(self.store.pubmap) == 0 \
                    and len(self.store.pub_db_cache) == 0:  # task queue must be empty
                    left = self.pubDao.getLeftCount(self.generation)
                    if left == 0:  # really finished.
                        return True
        return False

    def _maintainThreadPool(self, reload_all_thread):
        '''
                Maintain ThreadPool, detect and restart, and set running threads on the fly.
		'''
        # Collect Information.
        num_persont_alive = 0
        num_pubt_alive = 0

        if reload_all_thread:  # kill all thread first.
            for idx_pub_t in range(0, self.max_pub_thread):
                t = None
                if len(self.pub_thread_pool) <= idx_pub_t:
                    self.pub_thread_pool.append(t)
                else:
                    t = self.pub_thread_pool[idx_pub_t]
                if t is not None:
                    t.ask_to_stop = True
            self.pub_thread_pool = []

        # check and start all unstarted threads.
        idx_person_t = 0
        for idx_person_t in range(0, self.max_person_thread):
            t = None
            if len(self.person_thread_pool) <= idx_person_t:
                self.person_thread_pool.append(
                    t)  # if len less than max size, increase with None.
            else:
                t = self.person_thread_pool[idx_person_t]

            if t is None or not t.is_alive():  # if is None(new add) or dead.
                if self.running:
                    t = PersonProcessThread(self)
                    t.name = 'person-thread-' + str(idx_person_t)
                    self.person_thread_pool[idx_person_t] = t
                    t.start()
                    num_persont_alive += 1
            else:
                num_persont_alive += 1

        # kill threads if needed.
        for i in range(idx_person_t,
                       len(self.person_thread_pool) - 1):  #@UnusedVariable
            t = self.person_thread_pool.pop(idx_person_t)
            t.stop()
            print "$mgr/thread:> kill thread %s" % t.name

        # check and start all unstarted threads.
        idx_pub_t = 0
        for idx_pub_t in range(0, self.max_pub_thread):
            t = None
            if len(self.pub_thread_pool) <= idx_pub_t:
                self.pub_thread_pool.append(t)
            else:
                t = self.pub_thread_pool[idx_pub_t]

            if t is None or not t.is_alive():
                if self.running:
                    t = PubProcessThread(self)
                    t.name = 'pub-thread-' + str(idx_pub_t)
                    self.pub_thread_pool[idx_pub_t] = t
                    t.start()
                    num_pubt_alive += 1
            else:
                num_pubt_alive += 1

        # kill threads if needed.
        for i in range(idx_pub_t,
                       len(self.pub_thread_pool) - 1):  #@UnusedVariable
            t = self.pub_thread_pool.pop(idx_pub_t)
            t.stop()
            print "$mgr/thread:> kill thread %s" % t.name

        return (num_persont_alive, num_pubt_alive)