def _grab(self): for log_gz_file in self._grab_files(): echo("processing %s"%log_gz_file) dirname = os.path.dirname(log_gz_file) input_file = log_gz_file input_file_base_name = os.path.basename(input_file) new_input_file = \ os.path.join(self.work_dir, input_file_base_name[:-4]) if not os.path.exists(new_input_file): #print 'cp %s %s'%(log_gz_file, self.work_dir) os.system('cp %s %s'%(log_gz_file, self.work_dir)) #print 'cd %s;gunzip %s'%(self.work_dir, input_file_base_name) os.system('cd %s;bunzip2 %s'%(self.work_dir, input_file_base_name)) fd = open(new_input_file, "r") for record in fd: self._handle_record(record) self.current += 1 #while True: # lines = fd.readlines(100000) # if lines is None: # break # for line in lines: # self._handle_record(line) # self.current += 1 fd.close()
def _process_msg(self, msg): msgtype = msg.get_msgtype() if msgtype == MsgType.MsgCalcStart: echo("GrabberManager::calculation workers are ready") cs_msg = MsgCalcStart() cs_msg.cast(msg) cs_msg.parse() self.calc_names = cs_msg.getQueues() echo("GrabberManager::total %s calculation workers"%len(self.calc_names)) self._start_workers() elif msgtype == MsgType.MsgWorkerQuit: process_name = msg.get_header().get_sender() echo("GrabberManager::Notice worker \"%s\" quit"%process_name) if process_name in self.workers_data: self.workers_data[process_name]['status'] = False all_quit = True for pname, data in self.workers_data.items(): if data['status'] is True: all_quit = False break if all_quit is True: echo("GrabberManager::All grabber workers quit") echo("GrabberManager::i quit") self.finish = True elif msgtype == MsgType.MsgProgressReport: self._handle_report_msg(msg)
def _process_msg(self, msg): msgtype = msg.get_msgtype() if msgtype == MsgType.MsgCalcStart: echo("GrabberManager::calculation workers are ready") cs_msg = MsgCalcStart() cs_msg.cast(msg) cs_msg.parse() self.calc_names = cs_msg.getQueues() echo("GrabberManager::total %s calculation workers" % len(self.calc_names)) self._start_workers() elif msgtype == MsgType.MsgWorkerQuit: process_name = msg.get_header().get_sender() echo("GrabberManager::Notice worker \"%s\" quit" % process_name) if process_name in self.workers_data: self.workers_data[process_name]['status'] = False all_quit = True for pname, data in self.workers_data.items(): if data['status'] is True: all_quit = False break if all_quit is True: echo("GrabberManager::All grabber workers quit") echo("GrabberManager::i quit") self.finish = True elif msgtype == MsgType.MsgProgressReport: self._handle_report_msg(msg)
def _top_medias(self): top_list = self.stat_user.gen_stat_tops() echo("there are totaly %s top medias"%len(top_list)) self.dbsession = self.db.open('play_top_medias') data = {"seq": "", 'date':'%s_%s'%(self.config['start'], self.config['end']), 'mediaid': "", 'media_name':'', 'count': 0} i = 0 from common.fetch_media_data import VodMedia for mediaid, count in top_list: data['seq'] = i data['mediaid'] = mediaid media = VodMedia(mediaid) title = media.get_title() print "id: %s, cid: %s, title: %s"%\ (mediaid, media.get_category_id(), title) #print "%s(%s) -- %s"%(title, mediaid, media.get_category_id()) data['media_name'] = title data['count'] = count self.dbsession.insert(data) i += 1 self.dbsession.commit() self.dbsession.close()
def _report_progress(self): s = "" for name, data in self.workers_data.items(): s = "{0} --> cur: {1}, total: {2}, left: {3}, rate: {4}, "\ "exp: {5}".format( name, data['current'], data['total'], data['left'],data['rate'], datetime.timedelta(seconds=int(data['exp_time']))) echo(s)
def run(self): echo("process [%s/%s] start ..."%(self.pid, self.msgh_name)) try: ret = self._process() ret = self._final() except Exception, e: print repr(e) import traceback traceback.print_exc()
def run(self): echo("process [%s/%s] start ..." % (self.pid, self.msgh_name)) try: ret = self._process() ret = self._final() except Exception, e: print repr(e) import traceback traceback.print_exc()
def _notice_calc_finish(self): echo("GrabberManager::notice to calculation processes " "the work has been finished") gq_msg = MsgGrabberQuit() qid = self.msgh.findQueue("CalcManager") queue = self.msgh.getQueue(qid) if queue is None: warn("GrabberManager::_notice_calc_finish() cann not find queue" "with name calcManager") return self.queue.send(queue, gq_msg)
def _print_report(self): print "going to generate report file" d = {'timestamp': "", 'count': ''} mmax = 0 mmax_ts = '' for ts, count in self.data.items(): d['timestamp'] = ts d['count'] = count if count > mmax: mmax = count mmax_ts = ts self.dbsession.insert(d) self.dbsession.commit() echo("report generation completed, max value is %d, related ts is %s"\ %(mmax, mmax_ts))
def _report_progress(self): s = "" for name, data in self.workers_data.items(): completed = 0.0 if int(data['total']) == 0: completed = 0.0 else: completed = int(float(data['current']) / float(data['total']) * 100) status = '' if data['status'] is False: status = '[QUIT]' s = "{7}{0} --> cur: {1}, total: {2}, left: {3}({6}%), rate: {4}, "\ "exp: {5}".format( name, data['current'], data['total'], data['left'],data['rate'], datetime.timedelta(seconds=int(data['exp_time'])), completed, status) echo(s)
def _report_progress(self): s = "" for name, data in self.workers_data.items(): completed = 0.0 if int(data['total']) == 0: completed = 0.0 else: completed = int( float(data['current']) / float(data['total']) * 100) status = '' if data['status'] is False: status = '[QUIT]' s = "{7}{0} --> cur: {1}, total: {2}, left: {3}({6}%), rate: {4}, "\ "exp: {5}".format( name, data['current'], data['total'], data['left'],data['rate'], datetime.timedelta(seconds=int(data['exp_time'])), completed, status) echo(s)
def _grab(self): for log_gz_file in self._grab_files(): echo("processing %s"%log_gz_file) dirname = os.path.dirname(log_gz_file) input_file = log_gz_file input_file_base_name = os.path.basename(input_file) new_input_file = \ os.path.join(self.work_dir, input_file_base_name[:-4]) if not os.path.exists(new_input_file): #print 'cp %s %s'%(log_gz_file, self.work_dir) os.system('cp %s %s'%(log_gz_file, self.work_dir)) #print 'cd %s;gunzip %s'%(self.work_dir, input_file_base_name) os.system('cd %s;bunzip2 %s'%(self.work_dir, input_file_base_name)) fd = open(new_input_file, "r") for record in fd: self._handle_record(self.nginx_entry, record) self.current += 1 self.nginx_entry.reset() fd.close()
def _grab(self): for log_gz_file in self._grab_files(): echo("processing %s" % log_gz_file) dirname = os.path.dirname(log_gz_file) input_file = log_gz_file input_file_base_name = os.path.basename(input_file) new_input_file = \ os.path.join(self.work_dir, input_file_base_name[:-4]) if not os.path.exists(new_input_file): #print 'cp %s %s'%(log_gz_file, self.work_dir) os.system('cp %s %s' % (log_gz_file, self.work_dir)) #print 'cd %s;gunzip %s'%(self.work_dir, input_file_base_name) os.system('cd %s;bunzip2 %s' % (self.work_dir, input_file_base_name)) fd = open(new_input_file, "r") for record in fd: self._handle_record(self.nginx_entry, record) self.current += 1 self.nginx_entry.reset() fd.close()
def _print_report(self): echo("going to generate report file") echo("generating the count data for every seconds") db = Mydb() db.connect('report') dbname = "freq_trends_" + dbname_dict[api] dbsession = db.open(dbname) d = {'timestamp': "", 'count': ''} for ts, count in self.data.items(): d['timestamp'] = ts d['count'] = count self._mark_top(ts, count) dbsession.insert(d) dbsession.commit() dbsession.close() echo("generating the top count data for earch day") db.connect('report') dbname = "freq_tops_" + dbname_dict[api] dbsession = db.open(dbname) for ts, count in self.top_data.items(): d['timestamp'] = ts d['count'] = count dbsession.insert(d) dbsession.commit() dbsession.close()
def _grab(self): for log_gz_file in self._grab_files(): echo("processing %s"%log_gz_file) dirname = os.path.dirname(log_gz_file) input_file = log_gz_file[:-3] input_file_base_name = os.path.basename(input_file) new_input_file = \ os.path.join(self.work_dir, input_file_base_name) if not os.path.exists(new_input_file): #print 'cp %s %s'%(log_gz_file, self.work_dir) os.system('cp %s %s'%(log_gz_file, self.work_dir)) print 'cd %s;gunzip %s'%(self.work_dir, input_file_base_name) os.system('cd %s;gunzip %s'%(self.work_dir, input_file_base_name)) fd = open(new_input_file, "r") for record in fd: code = record[4:8] if code != "5011" and code != "5042": continue self._handle_record(record) self.current += 1 fd.close()
def _grab(self): for log_gz_file in self._grab_files(): echo("processing %s"%log_gz_file) dirname = os.path.dirname(log_gz_file) input_file = log_gz_file[:-3] input_file_base_name = os.path.basename(input_file) new_input_file = \ os.path.join(self.work_dir, input_file_base_name) if not os.path.exists(new_input_file): #print 'cp %s %s'%(log_gz_file, self.work_dir) os.system('cp %s %s'%(log_gz_file, self.work_dir)) #print 'cd %s;gunzip %s'%(self.work_dir, input_file_base_name) os.system('cd %s;gunzip %s'%(self.work_dir, input_file_base_name)) fd = open(new_input_file, "r") for record in fd: code = record[4:8] if code != '5010': continue self._handle_record(record) self.current += 1 fd.close()
def _process_msg(self, msg): msgtype = msg.get_msgtype() if msgtype == MsgType.MsgGrabberQuit: self._stop_workers() elif msgtype == MsgType.MsgWorkerQuit: process_name = msg.get_header().get_sender() echo("CalcManager::Notice worker \"%s\" quit" % process_name) if process_name in self.workers_data: self.workers_data[process_name]['status'] = False all_quit = True for pname, data in self.workers_data.items(): if data['status'] is True: all_quit = False break if all_quit is True: echo("CalcManager::All calculation workers quit") echo("CalcManager::i quit") self.finish = True elif msgtype == MsgType.MsgProgressReport: self._handle_progress_msg(msg)
def _process_msg(self, msg): msgtype = msg.get_msgtype() if msgtype == MsgType.MsgGrabberQuit: self._stop_workers() elif msgtype == MsgType.MsgWorkerQuit: process_name = msg.get_header().get_sender() echo("CalcManager::Notice worker \"%s\" quit"%process_name) if process_name in self.workers_data: self.workers_data[process_name]['status'] = False all_quit = True for pname, data in self.workers_data.items(): if data['status'] is True: all_quit = False break if all_quit is True: echo("CalcManager::All calculation workers quit") echo("CalcManager::i quit") self.finish = True elif msgtype == MsgType.MsgProgressReport: self._handle_progress_msg(msg)
def _final(self): echo("going to generate final report ...") self._print_report() echo("report generation completed") super(PlayUserCalcMgr, self)._final()
def _final(self): echo("going to generate final report ...") self._print_report() echo("report generation completed") super(TopMediasCalcMgr, self)._final()
def _final(self): echo("going to generate final report ...") self._print_report() self.dbsession.close() echo("report generation completed") super(AccessUserCalcMgr, self)._final()