from tool.MailReport import MailReporter if __name__ == "__main__": mr = MailReporter() mr.report("hi") print "done"
def mgrThreadBody(self): "Management Thread" print "#init:> start mgr & provider." getter = HtmlRetriever.getInstance(self.settings.use_proxy) while self.running or not self.stopped: # interval seconds passed. interval_seconds = (datetime.datetime.now() - self.last_report_time).seconds if interval_seconds == 0: interval_seconds = 1 self.last_report_time = datetime.datetime.now(); try: self.PersonThreadActive = 0 self.PubThreadActive = 0 for x in self.person_thread_pool: if x.check_idle(): self.PersonThreadActive += 1 for y in self.pub_thread_pool: if y.check_idle(): self.PubThreadActive += 1 except Exception: print "ERROR:count errer" print Exception try: # save pdf link if self.settings.save_pdflink: self.pdfcache.flush() except Exception: print "ERROR: pdf link" print Exception message = None # 什么时候重启所有线程&进程 reload_all_thread = False if self.num_report % 1000 == 0: reload_all_thread = True message = "Kill & Restart All Thread." try: # Maintain Threads and get worker threads status. (num_persont_alive, num_pubt_alive) = self._maintainThreadPool(reload_all_thread=False) except Exception: print "ERROR: maintain threads and worker" print Exception try: # Finish Condition. if self._checkFinishCondition(): self.running = False # -> tell all threads finish. message = "MESSAGE! Send terminal signal to all worker thread." except Exception: print "ERROR: condition check" print Exception # if all worker threads stopped, mgrThread can stop. if num_persont_alive == 0 and num_pubt_alive == 0: self.stopped = True message = "Send terminal signal to mgr_thread." # check network and count period_success_connection = getter.success_connection_count - getter.last_success_connection_count period_bad_connection = getter.bad_connection_count - getter.last_bad_connection_count total_connections = period_success_connection + period_bad_connection getter.last_success_connection_count = getter.success_connection_count getter.last_bad_connection_count = getter.bad_connection_count average_success_persecond = period_success_connection / float(interval_seconds) average_bad_persecond = period_bad_connection / float(interval_seconds) if False: # 是否Block模式,就是暂停整个程序 if getter.detect_mode: if getter.detect_success_count > 3: getter.leave_detect_mode() self.detect_exit_wait = 1 # 刚出来时,下两轮都不要再进入block模式了。 else: if total_connections * 0.9 < period_bad_connection: if self.detect_exit_wait > 0: print "---- waiting %s rounds ----" % self.detect_exit_wait self.detect_exit_wait -= 1 else: getter.enter_detect_mode() ################ print interval string ################ try: # print report if not getter.detect_mode: str_report = None if not self.pause: self.num_report += 1 str_report = self.num_report else: str_report = "paused" report_strs = [] report_strs.append("-" * 100) report_strs.append("\n") report_strs.append("$&mgr:%s(%s):> " % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), str_report)) report_strs.append("Person(%sT on %s), " % (num_persont_alive, self.store.person_queue.qsize())) report_strs.append("Pub(%sT on %s, %s items), " % (num_pubt_alive, len(self.store.pubmap), len(self.store.person_pub_map))) report_strs.append("DBCache({{{ %s }}}), " % len(self.store.pub_db_cache)) report_strs.append("T(busy/idle)(%s/%s), " % (self.busy_semaphore, self.settings.max_person_thread + self.settings.max_pub_thread - self.busy_semaphore)) report_strs += '\n' report_strs.append("Person(busy/idle)(%s/%s), Pub(busy/idle)(%s/%s)" % (self.busy_person_semaphore, self.settings.max_person_thread-self.busy_person_semaphore, self.busy_pub_semaphore, self.settings.max_pub_thread-self.busy_pub_semaphore)) g = getter.success_connection_count b = getter.bad_connection_count t = g + b rate = 0 if(t > 0): rate = g / float(t) report_strs.append("network(g+b=t)=(%s+%s=%s),rate=%.2f " % (g, b , t, rate)) report_strs.append("interval-network(g+b=t)=(%s+%s=%s), " % (period_success_connection, period_bad_connection, total_connections)) report_strs.append("avg:(g%.1f b%.1f in %s seconds.), " % (average_success_persecond, average_bad_persecond, interval_seconds)) report_strs.append("\n") report_strs.append("now have %s child threads, " % self.threadChildren) report_strs.append("active threads (%s person, %s pub) , " % (self.PersonThreadActive, self.PubThreadActive)) report_strs.append("\n") report_strs.append("time:(wait=%.2f, getlock=%.2f, get=%.2f)" % (self.store.ppt_wait, self.store.ppt_getlock, self.store.ppt_get)) if message is not None: report_strs.append("\n") report_strs.append(message) report_strs.append("\n") report_strs.append(" * Process NA Persons : %s.\n" % self.reportPersonProgress(self.generation)) report_strs.append(" * Process Publication: %s.\n" % self.reportPublicationProgress(self.generation)) report_strs.append("-" * 100) report_strs.append("\n") print "".join(report_strs) if (self.num_report%100 == 0): mr = MailReporter() mr.report(report_strs) except Exception: print "ERROR: report error" print Exception try: self.store.flushDBCache() # last flush cache to db. self.store.running = self.running # pass main running thread to Store object. except Exception: print "ERROR: flush db cache" print Exception time.sleep(self.mgr_interval) # interval print "$mgr:> exit."
def mgrThreadBody(self): "Management Thread" print "#init:> start mgr & provider." getter = HtmlRetriever.getInstance(self.settings.use_proxy) while self.running or not self.stopped: # interval seconds passed. interval_seconds = (datetime.datetime.now() - self.last_report_time).seconds if interval_seconds == 0: interval_seconds = 1 self.last_report_time = datetime.datetime.now() try: self.PersonThreadActive = 0 self.PubThreadActive = 0 for x in self.person_thread_pool: if x.check_idle(): self.PersonThreadActive += 1 for y in self.pub_thread_pool: if y.check_idle(): self.PubThreadActive += 1 except Exception: print "ERROR:count errer" print Exception try: # save pdf link if self.settings.save_pdflink: self.pdfcache.flush() except Exception: print "ERROR: pdf link" print Exception message = None # 什么时候重启所有线程&进程 reload_all_thread = False if self.num_report % 1000 == 0: reload_all_thread = True message = "Kill & Restart All Thread." try: # Maintain Threads and get worker threads status. (num_persont_alive, num_pubt_alive) = self._maintainThreadPool( reload_all_thread=False) except Exception: print "ERROR: maintain threads and worker" print Exception try: # Finish Condition. if self._checkFinishCondition(): self.running = False # -> tell all threads finish. message = "MESSAGE! Send terminal signal to all worker thread." except Exception: print "ERROR: condition check" print Exception # if all worker threads stopped, mgrThread can stop. if num_persont_alive == 0 and num_pubt_alive == 0: self.stopped = True message = "Send terminal signal to mgr_thread." # check network and count period_success_connection = getter.success_connection_count - getter.last_success_connection_count period_bad_connection = getter.bad_connection_count - getter.last_bad_connection_count total_connections = period_success_connection + period_bad_connection getter.last_success_connection_count = getter.success_connection_count getter.last_bad_connection_count = getter.bad_connection_count average_success_persecond = period_success_connection / float( interval_seconds) average_bad_persecond = period_bad_connection / float( interval_seconds) if False: # 是否Block模式,就是暂停整个程序 if getter.detect_mode: if getter.detect_success_count > 3: getter.leave_detect_mode() self.detect_exit_wait = 1 # 刚出来时,下两轮都不要再进入block模式了。 else: if total_connections * 0.9 < period_bad_connection: if self.detect_exit_wait > 0: print "---- waiting %s rounds ----" % self.detect_exit_wait self.detect_exit_wait -= 1 else: getter.enter_detect_mode() ################ print interval string ################ try: # print report if not getter.detect_mode: str_report = None if not self.pause: self.num_report += 1 str_report = self.num_report else: str_report = "paused" report_strs = [] report_strs.append("-" * 100) report_strs.append("\n") report_strs.append( "$&mgr:%s(%s):> " % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), str_report)) report_strs.append( "Person(%sT on %s), " % (num_persont_alive, self.store.person_queue.qsize())) report_strs.append("Pub(%sT on %s, %s items), " % (num_pubt_alive, len(self.store.pubmap), len(self.store.person_pub_map))) report_strs.append("DBCache({{{ %s }}}), " % len(self.store.pub_db_cache)) report_strs.append( "T(busy/idle)(%s/%s), " % (self.busy_semaphore, self.settings.max_person_thread + self.settings.max_pub_thread - self.busy_semaphore)) report_strs += '\n' report_strs.append( "Person(busy/idle)(%s/%s), Pub(busy/idle)(%s/%s)" % (self.busy_person_semaphore, self.settings.max_person_thread - self.busy_person_semaphore, self.busy_pub_semaphore, self.settings.max_pub_thread - self.busy_pub_semaphore)) g = getter.success_connection_count b = getter.bad_connection_count t = g + b rate = 0 if (t > 0): rate = g / float(t) report_strs.append("network(g+b=t)=(%s+%s=%s),rate=%.2f " % (g, b, t, rate)) report_strs.append( "interval-network(g+b=t)=(%s+%s=%s), " % (period_success_connection, period_bad_connection, total_connections)) report_strs.append( "avg:(g%.1f b%.1f in %s seconds.), " % (average_success_persecond, average_bad_persecond, interval_seconds)) report_strs.append("\n") report_strs.append("now have %s child threads, " % self.threadChildren) report_strs.append( "active threads (%s person, %s pub) , " % (self.PersonThreadActive, self.PubThreadActive)) report_strs.append("\n") report_strs.append( "time:(wait=%.2f, getlock=%.2f, get=%.2f)" % (self.store.ppt_wait, self.store.ppt_getlock, self.store.ppt_get)) if message is not None: report_strs.append("\n") report_strs.append(message) report_strs.append("\n") report_strs.append( " * Process NA Persons : %s.\n" % self.reportPersonProgress(self.generation)) report_strs.append( " * Process Publication: %s.\n" % self.reportPublicationProgress(self.generation)) report_strs.append("-" * 100) report_strs.append("\n") print "".join(report_strs) if (self.num_report % 100 == 0): mr = MailReporter() mr.report(report_strs) except Exception: print "ERROR: report error" print Exception try: self.store.flushDBCache() # last flush cache to db. self.store.running = self.running # pass main running thread to Store object. except Exception: print "ERROR: flush db cache" print Exception time.sleep(self.mgr_interval) # interval print "$mgr:> exit."