Exemple #1
0
from tool.MailReport import MailReporter

if __name__ == "__main__":
	mr = MailReporter()
	mr.report("hi")
	print "done"
Exemple #2
0
	def mgrThreadBody(self):
		"Management Thread"
		print "#init:> start mgr & provider."
		getter = HtmlRetriever.getInstance(self.settings.use_proxy)

		while self.running or not self.stopped:
			# interval seconds passed.
			interval_seconds = (datetime.datetime.now() - self.last_report_time).seconds
			if interval_seconds == 0: interval_seconds = 1
			self.last_report_time = datetime.datetime.now();

			try:
				self.PersonThreadActive = 0
				self.PubThreadActive = 0
				for x in self.person_thread_pool:
					if x.check_idle():
						self.PersonThreadActive += 1	
				for y in self.pub_thread_pool:
					if y.check_idle():
						self.PubThreadActive += 1
			except Exception:
				print "ERROR:count errer"
				print Exception

			try:
				# save pdf link
				if self.settings.save_pdflink:
					self.pdfcache.flush()
			except Exception:
				print "ERROR: pdf link"
				print Exception

			message = None

			# 什么时候重启所有线程&进程
			reload_all_thread = False
			if self.num_report % 1000 == 0:
				reload_all_thread = True
				message = "Kill & Restart All Thread."
			
			try:
				# Maintain Threads and get worker threads status.
				(num_persont_alive, num_pubt_alive) = self._maintainThreadPool(reload_all_thread=False)
			except Exception:
				print "ERROR: maintain threads and worker"
				print Exception

			try:
				# Finish Condition.
				if self._checkFinishCondition():
					self.running = False					# -> tell all threads finish.
					message = "MESSAGE! Send terminal signal to all worker thread."
			except Exception:
				print "ERROR: condition check"
				print Exception
				
			# if all worker threads stopped, mgrThread can stop.
			if num_persont_alive == 0 and num_pubt_alive == 0:
				self.stopped = True
				message = "Send terminal signal to mgr_thread."

			# check network and count 
			period_success_connection = getter.success_connection_count - getter.last_success_connection_count
			period_bad_connection = getter.bad_connection_count - getter.last_bad_connection_count
			total_connections = period_success_connection + period_bad_connection
			getter.last_success_connection_count = getter.success_connection_count
			getter.last_bad_connection_count = getter.bad_connection_count

			average_success_persecond = period_success_connection / float(interval_seconds)
			average_bad_persecond = period_bad_connection / float(interval_seconds)

			if False: # 是否Block模式,就是暂停整个程序
				if getter.detect_mode:
					if getter.detect_success_count > 3:
						getter.leave_detect_mode()
						self.detect_exit_wait = 1 # 刚出来时,下两轮都不要再进入block模式了。
				else:
					if total_connections * 0.9 < period_bad_connection:
						if self.detect_exit_wait > 0:
							print "---- waiting %s rounds ----" % self.detect_exit_wait
							self.detect_exit_wait -= 1
						else:
							getter.enter_detect_mode()

			################ print interval string ################
			try:
				# print report
				if not getter.detect_mode:
					str_report = None
					if not self.pause:
						self.num_report += 1
						str_report = self.num_report
					else:
						str_report = "paused"
					
					report_strs = []
					report_strs.append("-" * 100)
					report_strs.append("\n")
					report_strs.append("$&mgr:%s(%s):> " % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), str_report))
					report_strs.append("Person(%sT on %s), " % (num_persont_alive, self.store.person_queue.qsize()))
					report_strs.append("Pub(%sT on %s, %s items), " % (num_pubt_alive, len(self.store.pubmap), len(self.store.person_pub_map)))
					report_strs.append("DBCache({{{ %s }}}), " % len(self.store.pub_db_cache))
					report_strs.append("T(busy/idle)(%s/%s), " % (self.busy_semaphore, self.settings.max_person_thread + self.settings.max_pub_thread - self.busy_semaphore))
					report_strs += '\n'
					report_strs.append("Person(busy/idle)(%s/%s), Pub(busy/idle)(%s/%s)" % (self.busy_person_semaphore, self.settings.max_person_thread-self.busy_person_semaphore, self.busy_pub_semaphore, self.settings.max_pub_thread-self.busy_pub_semaphore))
					g = getter.success_connection_count
					b = getter.bad_connection_count
					t = g + b
					rate = 0
					if(t > 0):
						rate = g / float(t)
					report_strs.append("network(g+b=t)=(%s+%s=%s),rate=%.2f " % (g, b , t, rate))
					report_strs.append("interval-network(g+b=t)=(%s+%s=%s), " % (period_success_connection, period_bad_connection, total_connections))
					report_strs.append("avg:(g%.1f b%.1f in %s seconds.), " % (average_success_persecond, average_bad_persecond, interval_seconds))
					report_strs.append("\n")
					report_strs.append("now have %s child threads, " % self.threadChildren)
					report_strs.append("active threads (%s person, %s pub) , " % (self.PersonThreadActive, self.PubThreadActive))
					report_strs.append("\n")
					report_strs.append("time:(wait=%.2f, getlock=%.2f, get=%.2f)" % (self.store.ppt_wait, self.store.ppt_getlock, self.store.ppt_get))
					if message is not None:
						report_strs.append("\n")
						report_strs.append(message)
					report_strs.append("\n")
						
					report_strs.append(" * Process NA Persons : %s.\n" % self.reportPersonProgress(self.generation))
					report_strs.append(" * Process Publication: %s.\n" % self.reportPublicationProgress(self.generation))
					report_strs.append("-" * 100)
					report_strs.append("\n")

					print "".join(report_strs)
					if (self.num_report%100 == 0):
						mr = MailReporter()
						mr.report(report_strs)
			except Exception:
				print "ERROR: report error"
				print Exception

			try:
				self.store.flushDBCache()				# last flush cache to db.
				self.store.running = self.running		# pass main running thread to Store object.
			except Exception:
				print "ERROR: flush db cache"
				print Exception

			time.sleep(self.mgr_interval) 			# interval

		print "$mgr:> exit."
Exemple #3
0
    def mgrThreadBody(self):
        "Management Thread"
        print "#init:> start mgr & provider."
        getter = HtmlRetriever.getInstance(self.settings.use_proxy)

        while self.running or not self.stopped:
            # interval seconds passed.
            interval_seconds = (datetime.datetime.now() -
                                self.last_report_time).seconds
            if interval_seconds == 0: interval_seconds = 1
            self.last_report_time = datetime.datetime.now()

            try:
                self.PersonThreadActive = 0
                self.PubThreadActive = 0
                for x in self.person_thread_pool:
                    if x.check_idle():
                        self.PersonThreadActive += 1
                for y in self.pub_thread_pool:
                    if y.check_idle():
                        self.PubThreadActive += 1
            except Exception:
                print "ERROR:count errer"
                print Exception

            try:
                # save pdf link
                if self.settings.save_pdflink:
                    self.pdfcache.flush()
            except Exception:
                print "ERROR: pdf link"
                print Exception

            message = None

            # 什么时候重启所有线程&进程
            reload_all_thread = False
            if self.num_report % 1000 == 0:
                reload_all_thread = True
                message = "Kill & Restart All Thread."

            try:
                # Maintain Threads and get worker threads status.
                (num_persont_alive, num_pubt_alive) = self._maintainThreadPool(
                    reload_all_thread=False)
            except Exception:
                print "ERROR: maintain threads and worker"
                print Exception

            try:
                # Finish Condition.
                if self._checkFinishCondition():
                    self.running = False  # -> tell all threads finish.
                    message = "MESSAGE! Send terminal signal to all worker thread."
            except Exception:
                print "ERROR: condition check"
                print Exception

            # if all worker threads stopped, mgrThread can stop.
            if num_persont_alive == 0 and num_pubt_alive == 0:
                self.stopped = True
                message = "Send terminal signal to mgr_thread."

            # check network and count
            period_success_connection = getter.success_connection_count - getter.last_success_connection_count
            period_bad_connection = getter.bad_connection_count - getter.last_bad_connection_count
            total_connections = period_success_connection + period_bad_connection
            getter.last_success_connection_count = getter.success_connection_count
            getter.last_bad_connection_count = getter.bad_connection_count

            average_success_persecond = period_success_connection / float(
                interval_seconds)
            average_bad_persecond = period_bad_connection / float(
                interval_seconds)

            if False:  # 是否Block模式,就是暂停整个程序
                if getter.detect_mode:
                    if getter.detect_success_count > 3:
                        getter.leave_detect_mode()
                        self.detect_exit_wait = 1  # 刚出来时,下两轮都不要再进入block模式了。
                else:
                    if total_connections * 0.9 < period_bad_connection:
                        if self.detect_exit_wait > 0:
                            print "---- waiting %s rounds ----" % self.detect_exit_wait
                            self.detect_exit_wait -= 1
                        else:
                            getter.enter_detect_mode()

            ################ print interval string ################
            try:
                # print report
                if not getter.detect_mode:
                    str_report = None
                    if not self.pause:
                        self.num_report += 1
                        str_report = self.num_report
                    else:
                        str_report = "paused"

                    report_strs = []
                    report_strs.append("-" * 100)
                    report_strs.append("\n")
                    report_strs.append(
                        "$&mgr:%s(%s):> " %
                        (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                         str_report))
                    report_strs.append(
                        "Person(%sT on %s), " %
                        (num_persont_alive, self.store.person_queue.qsize()))
                    report_strs.append("Pub(%sT on %s, %s items), " %
                                       (num_pubt_alive, len(self.store.pubmap),
                                        len(self.store.person_pub_map)))
                    report_strs.append("DBCache({{{ %s }}}), " %
                                       len(self.store.pub_db_cache))
                    report_strs.append(
                        "T(busy/idle)(%s/%s), " %
                        (self.busy_semaphore, self.settings.max_person_thread +
                         self.settings.max_pub_thread - self.busy_semaphore))
                    report_strs += '\n'
                    report_strs.append(
                        "Person(busy/idle)(%s/%s), Pub(busy/idle)(%s/%s)" %
                        (self.busy_person_semaphore,
                         self.settings.max_person_thread -
                         self.busy_person_semaphore, self.busy_pub_semaphore,
                         self.settings.max_pub_thread -
                         self.busy_pub_semaphore))
                    g = getter.success_connection_count
                    b = getter.bad_connection_count
                    t = g + b
                    rate = 0
                    if (t > 0):
                        rate = g / float(t)
                    report_strs.append("network(g+b=t)=(%s+%s=%s),rate=%.2f " %
                                       (g, b, t, rate))
                    report_strs.append(
                        "interval-network(g+b=t)=(%s+%s=%s), " %
                        (period_success_connection, period_bad_connection,
                         total_connections))
                    report_strs.append(
                        "avg:(g%.1f b%.1f in %s seconds.), " %
                        (average_success_persecond, average_bad_persecond,
                         interval_seconds))
                    report_strs.append("\n")
                    report_strs.append("now have %s child threads, " %
                                       self.threadChildren)
                    report_strs.append(
                        "active threads (%s person, %s pub) , " %
                        (self.PersonThreadActive, self.PubThreadActive))
                    report_strs.append("\n")
                    report_strs.append(
                        "time:(wait=%.2f, getlock=%.2f, get=%.2f)" %
                        (self.store.ppt_wait, self.store.ppt_getlock,
                         self.store.ppt_get))
                    if message is not None:
                        report_strs.append("\n")
                        report_strs.append(message)
                    report_strs.append("\n")

                    report_strs.append(
                        " * Process NA Persons : %s.\n" %
                        self.reportPersonProgress(self.generation))
                    report_strs.append(
                        " * Process Publication: %s.\n" %
                        self.reportPublicationProgress(self.generation))
                    report_strs.append("-" * 100)
                    report_strs.append("\n")

                    print "".join(report_strs)
                    if (self.num_report % 100 == 0):
                        mr = MailReporter()
                        mr.report(report_strs)
            except Exception:
                print "ERROR: report error"
                print Exception

            try:
                self.store.flushDBCache()  # last flush cache to db.
                self.store.running = self.running  # pass main running thread to Store object.
            except Exception:
                print "ERROR: flush db cache"
                print Exception

            time.sleep(self.mgr_interval)  # interval

        print "$mgr:> exit."