def insert_to_mysql_on_duplicate(self, columns_data): field_exclude = ["uid", "video_id", "is56", "dtime", "play_plat"] cmc = ContantsMappingColumn() table_field_len = 0 update_field = [] table_field = [] datalist = [] for report in columns_data.values(): report.setdefault("dtime", self.mydate.get_now()) report.setdefault("play_plat", 1) if table_field_len == 0: for k in report.keys(): table_field.append(k) if k not in field_exclude: update_field.append("%s=values(%s)" % (k, k)) table_field_len = len(table_field) table_value = [] for v in table_field: table_value.append(report.get(v, cmc.get_default_value(v))) datalist.append(tuple(table_value)) sql_head = ' insert into ' + self.table_name sql_body = ' ( ' + ','.join(table_field) + ' ) ' sql_blank = ' , %s ' * (table_field_len - 1) sql_tail = ' values ( %s ' + sql_blank + ' ) ' sql_duplicate = ' ON DUPLICATE KEY UPDATE ' + ','.join(update_field) sql = sql_head + sql_body + sql_tail + sql_duplicate Util.printf(sql) Util.printf("will update rows_num:%d" % datalist.__len__()) self.mysql_executemany(sql, datalist)
def run(self): Util.printf("thread running".ljust(50,".")) while self.isRun: if not self.q.isEmpty(): Util.printf("proccess running".ljust(50,".")) singal = str(self.q.get_singal()) p = MyProcess(singal,self.path,self.mysqlWrap,self.constants,self.monitor,self.lock) self.jobs.setdefault(singal,p) p.start() Util.printf("queque pop %s" % singal) else: jbs = [job.is_alive() for job in self.jobs.values() ] if jbs.count(False) == self.jobs_size : self.isRun = False if jbs.__len__() == self.jobs_size: self.isRun = False if jbs.__len__() > 0 and jbs.__len__() == self.jobs_size - 1: self.q.put_singal("URLRequest") jps_show = [(name,job.is_alive()) for name,job in self.jobs.items() ] Util.printf("current process is_alive:%s" % jps_show) time.sleep(2) Util.printf("thread stop".ljust(50,"."))
def get_conn_qs(self): try: conn = MySQLdb.connect(host=self.host, user=self.user, passwd=self.passwd, db=self.db, charset=self.charset, port=self.port) return conn except Exception, e: Util.printf(e)
def insert_update_report(self, **args): map_store = args.get("map_store") update_fields = args.get("update_fields") table_name = "video_report_%d" % int( self.monitorReport.mydate.get_now(format='ym')) table_field = [] update_field = [] datalist = [] table_field_len = 0 for report in map_store.values(): if table_field_len == 0: for k in report.keys(): table_field.append(k) table_field_len = len(table_field) table_value = [] for v in table_field: table_value.append(report[v]) datalist.append(tuple(table_value)) for field in update_fields: update_field.append("%s=values(%s)" % (field, field)) sql_head = ' insert into ' + table_name sql_body = ' ( ' + ','.join(table_field) + ' ) ' sql_blank = ' , %s ' * (table_field_len - 1) sql_tail = ' values ( %s ' + sql_blank + ' ) ' sql_duplicate = ' ON DUPLICATE KEY UPDATE ' + ','.join(update_field) sql = sql_head + sql_body + sql_tail + sql_duplicate Util.printf(sql) self.mysql_executemany(sql, datalist)
def insert_report(self, map_store): table_name = "video_report_%d" % int( self.monitorReport.mydate.get_now(format='ym')) table_field = [] datalist = [] table_field_len = 0 for report in map_store.values(): if table_field_len == 0: for k in report.keys(): table_field.append(k) table_field_len = len(table_field) table_value = [] for v in table_field: table_value.append(report[v]) datalist.append(tuple(table_value)) sql_head = ' insert into ' + table_name sql_body = ' ( ' + ','.join(table_field) + ' ) ' sql_blank = ' , %s ' * (table_field_len - 1) sql_tail = ' values ( %s ' + sql_blank + ' ) ' sql = sql_head + sql_body + sql_tail Util.printf(sql) self.mysql_executemany(sql, datalist)
def monitor_exception_count(self, e, isTimeout_retry, url): if str(e).find('timed out') > -1 or str(e).find('Time-out') > -1: if not isTimeout_retry: self.monitor.urls_parallel_timeout.append(url) else: self.monitor.urls_retry_failure.append(url) Util.printf('urlopen-->%s | exception--> %s' % (url, e))
def monitor_exception_count(self, e, isTimeout_retry, url): if str(e).find("timed out") > -1 or str(e).find("Time-out") > -1: if not isTimeout_retry: self.monitor.urls_parallel_timeout.append(url) else: self.monitor.urls_retry_failure.append(url) Util.printf("urlopen-->%s | exception--> %s" % (url, e))
def proxy(self, path, func): if not self.file_exist(path, True): return if func.has_key('preprocess'): self.preprocess = func.get('preprocess') if func.has_key('process'): self.process = func.get('process') if func.has_key('postprocess'): self.postprocess = func.get('postprocess') context = self.preprocess() file_object = open(path) try: for line in file_object: try: exitcode = self.process(line, context) if exitcode == 0: continue elif exitcode == -1: return except Exception, e: Util.printf(e) Util.printf("first item:" + line) return self.postprocess(context)
def select_map_store(self): map_store={} table = "video_report_%d" % int(self.monitorReport.mydate.get_now(format='ym')) fields = [] fiedls_position={} re=ReportEntity.ReportEntity() count = 0 for f in re.__dict__: fields.append(f) fiedls_position.setdefault(f,count) count+=1 sql = ' select ' sql += ','.join(fields) sql += ' from '+ table sql += ' where dtime='+ self.monitorReport.mydate.get_now() +' and play_plat=2 ' Util.printf(sql) rows = self.mysql_select(sql) for row in rows: video_id= row[fiedls_position.get('video_id')] report = {} for k , v in fiedls_position.iteritems(): report.setdefault(k,row[v]) map_store.setdefault(('%d' % video_id),report) return map_store
def insert_to_mysql_default(self, columns_data): field_exclude = ["uid", "video_id", "is56", "dtime", "play_plat"] datalist = [] cmc = ContantsMappingColumn() contants = cmc.get_unique_value() table_field = [k for k in contants] table_field.append("dtime") table_field.append("play_plat") table_field_len = len(table_field) set_diff = contants.difference(set(field_exclude)) update_field = [] map(lambda s: update_field.append("%s=values(%s)" % (s, s)), set_diff) for report in columns_data.values(): report.setdefault("dtime", self.mydate.get_now()) report.setdefault("play_plat", 1) table_value = [] for v in table_field: table_value.append(report.get(v, cmc.get_default_value(v))) datalist.append(tuple(table_value)) sql_head = ' insert into ' + self.table_name sql_body = ' ( ' + ','.join(table_field) + ' ) ' sql_blank = ' , %s ' * (table_field_len - 1) sql_tail = ' values ( %s ' + sql_blank + ' ) ' sql_duplicate = ' ON DUPLICATE KEY UPDATE ' + ','.join(update_field) sql = sql_head + sql_body + sql_tail + sql_duplicate Util.printf(sql) Util.printf("will update rows_num:%d" % datalist.__len__()) self.mysql_executemany(sql, datalist)
def query_all_donate(self, path): sql = "SELECT WM_ACCEPT_USER_ID uid,VID vid,IS_FROM_56 is56,sum(WM_GIVE_MONEY) money " \ " FROM wm_sponsor_record where WM_GIVE_DAY=%s group by WM_ACCEPT_USER_ID,VID,IS_FROM_56" Util.printf(sql % self.mydate.get_now()) rows = self.mysql_select(sql % self.mydate.get_now()) self.save_rows(rows, path, 4) self.q.put_singal(path[os.path.dirname(path).__len__() + 1:])
def get_conn(self): try: conn = cx_Oracle.connect( '%s/%s@%s/%s' % (self.user, self.passwd, self.host, self.db)) return conn except Exception, e: Util.printf(e)
def select_map_store(self): map_store = {} table = "video_report_%d" % int( self.monitorReport.mydate.get_now(format='ym')) fields = [] fiedls_position = {} re = ReportEntity.ReportEntity() count = 0 for f in re.__dict__: fields.append(f) fiedls_position.setdefault(f, count) count += 1 sql = ' select ' sql += ','.join(fields) sql += ' from ' + table sql += ' where dtime=' + self.monitorReport.mydate.get_now( ) + ' and play_plat=2 ' Util.printf(sql) rows = self.mysql_select(sql) for row in rows: video_id = row[fiedls_position.get('video_id')] report = {} for k, v in fiedls_position.iteritems(): report.setdefault(k, row[v]) map_store.setdefault(('%d' % video_id), report) return map_store
def insert_update_report(self, **args): map_store=args.get("map_store") update_fields=args.get("update_fields") table_name = "video_report_%d" % int(self.monitorReport.mydate.get_now(format='ym')) table_field = [] update_field=[] datalist = [] table_field_len = 0 for report in map_store.values(): if table_field_len == 0: for k in report.keys(): table_field.append(k) table_field_len = len(table_field) table_value = [] for v in table_field: table_value.append(report[v]) datalist.append(tuple(table_value)) for field in update_fields: update_field.append( "%s=values(%s)" % (field,field)) sql_head = ' insert into ' + table_name sql_body = ' ( ' + ','.join(table_field) + ' ) ' sql_blank = ' , %s ' * (table_field_len - 1) sql_tail = ' values ( %s ' + sql_blank + ' ) ' sql_duplicate=' ON DUPLICATE KEY UPDATE '+','.join(update_field) sql = sql_head + sql_body + sql_tail + sql_duplicate Util.printf(sql) self.mysql_executemany(sql, datalist)
def monitor_exception_count(self, e, isTimeout_retry, url): if str(e).find('urlopen error timed out') > -1: if not isTimeout_retry: self.monitor.urls_serial_timeout.append(url) else: self.monitor.urls_retry_failure.append(url) Util.printf('urlopen-->%s | exception--> %s' % (url, e))
def save_url_uid(self, map_store): sql = 'update ' + self.table_name + ' set is_user_share=2 where uid =%s and dtime =%s and play_plat=1 ' datalist = [] for k in map_store.keys(): datalist.append((k, self.mydate.get_now())) Util.printf("will update rows_num:%d" % datalist.__len__()) self.mysql_executemany(sql, datalist)
def proxy(self, path, func): if not self.file_exist(path,True): return if func.has_key('preprocess'): self.preprocess = func.get('preprocess') if func.has_key('process'): self.process = func.get('process') if func.has_key('postprocess'): self.postprocess = func.get('postprocess') context = self.preprocess() file_object = open(path) try: for line in file_object: try: exitcode = self.process(line, context) if exitcode == 0: continue elif exitcode == -1: return except Exception, e: Util.printf(e) Util.printf("first item:"+line) return self.postprocess(context)
def measure_time(*args, **kwargs): t1 = time.time() result = fn(*args, **kwargs) t2 = time.time() Util.printf('call %s(): took %s seconds' % (fn.__name__, str(t2 - t1))) return result
def mysql_select(self, sql): conn = self.get_conn_qs() cursor = conn.cursor() try: n = cursor.execute(sql) return cursor.fetchall() except Exception, e: Util.printf(e)
def mysql_select(self,sql): conn = self.get_conn_qs() cursor = conn.cursor() try: n = cursor.execute(sql) return cursor.fetchall() except Exception, e: Util.printf(e)
def oracle_fetchall(self, sql): conn = self.get_conn() cursor = conn.cursor() try: cursor.execute(sql) rows = cursor.fetchall() return rows except Exception, e: Util.printf(e)
def save_url_vid(self, map_store): sql = 'update ' + self.table_name + ' set cate_code=%s,play_type=%s,video_play_time=%s,video_title=%s,is_video_share=%s where video_id=%s and dtime=%s and play_plat=1' datalist = [] for k, v in map_store.items(): datalist.append( (v.get("cate_code"), v.get("play_type"), v.get("video_play_time"), v.get("video_title"), v.get("is_video_share"), k, self.mydate.get_now())) Util.printf("will update rows_num:%d" % datalist.__len__()) self.mysql_executemany(sql, datalist)
def mysql_execute_new(self, sql, datalist): conn = self.get_conn_qs() cursor = conn.cursor() try: for data in datalist: tmpsql = sql % tuple(data) n = cursor.execute(sql,tuple(data)) conn.commit() Util.printf('n=%s | sql=%s' % (n, tmpsql)) except Exception, e: Util.printf(e)
def mysql_execute_new(self, sql, datalist): conn = self.get_conn_qs() cursor = conn.cursor() try: for data in datalist: tmpsql = sql % tuple(data) n = cursor.execute(sql, tuple(data)) conn.commit() Util.printf('n=%s | sql=%s' % (n, tmpsql)) except Exception, e: Util.printf(e)
def sub_job_join(self): while True: if self.jobs.__len__() == self.jobs_size: for k,job in self.jobs.items() : if job.is_alive(): Util.printf(("%s proccess join".ljust(50,".")) % k) job.join() break else: Util.printf("main proccess sleeping".ljust(50,".")) time.sleep(10)
def read_keyvalue(self, path): update_map = {} self.file_exist(path) file_object = open(path) try: for line in file_object: kvs = line.split() if len(kvs) < 2: continue update_map.setdefault(kvs[0], kvs[1]) return update_map except Exception, e: Util.printf(e)
def update_mysql_field(self, *args): update_map = args[0] update_field = args[1] uids = args[2] table = "video_report_%d" % int(self.monitorReport.mydate.get_now(format='ym')) base_sql = "update %s set %s" % (table, update_field) sql = base_sql + "=%s where dtime=%s and play_plat=2 and video_id=%s and uid=%s" Util.printf(sql) datalist = [] for k, v in update_map.items(): datalist.append((v, self.monitorReport.mydate.get_now('-'), k, uids.get(k, 0))) self.mysql_executemany(sql, datalist)
def update_mysql_field(self, *args): update_map = args[0] update_field = args[1] uids = args[2] table = "video_report_%d" % int( self.monitorReport.mydate.get_now(format='ym')) base_sql = "update %s set %s" % (table, update_field) sql = base_sql + "=%s where dtime=%s and play_plat=2 and video_id=%s and uid=%s" Util.printf(sql) datalist = [] for k, v in update_map.items(): datalist.append( (v, self.monitorReport.mydate.get_now('-'), k, uids.get(k, 0))) self.mysql_executemany(sql, datalist)
def mysql_executemany(self, sql, datalist): conn = self.get_conn_qs() cursor = conn.cursor() try: datalen = len(datalist) + 1 for i in range(datalen): if i % 500 == 0: n = cursor.executemany(sql, datalist[i:500 + i]) Util.printf(n) conn.commit() except Exception, e: Util.printf(e) import traceback traceback.print_exc()
class FileBase(object): def __init__(self): pass def proxy(self, path, func): if not self.file_exist(path, True): return if func.has_key('preprocess'): self.preprocess = func.get('preprocess') if func.has_key('process'): self.process = func.get('process') if func.has_key('postprocess'): self.postprocess = func.get('postprocess') context = self.preprocess() file_object = open(path) try: for line in file_object: try: exitcode = self.process(line, context, path) if exitcode == 0: continue elif exitcode == -1: return except Exception, e: Util.printf(e) return self.postprocess(context) except Exception, e: Util.printf(e)
def mysql_executemany(self, sql, datalist): conn = self.get_conn_qs() cursor = conn.cursor() try: datalen = len(datalist) + 1 for i in range(datalen): if i % 500 == 0: n = cursor.executemany(sql, datalist[i:500 + i]) data_leave = datalen - i - 500 prefix = str("data_leave:%s" % data_leave).ljust(20) postfix = str("| commit_already:%s" % n).ljust(20) Util.printf(prefix + postfix) conn.commit() except Exception, e: Util.printf(e) import traceback traceback.print_exc()
def insert_report(self, map_store): table_name = "video_report_%d" % int(self.monitorReport.mydate.get_now(format='ym')) table_field = [] datalist = [] table_field_len = 0 for report in map_store.values(): if table_field_len == 0: for k in report.keys(): table_field.append(k) table_field_len = len(table_field) table_value = [] for v in table_field: table_value.append(report[v]) datalist.append(tuple(table_value)) sql_head = ' insert into ' + table_name sql_body = ' ( ' + ','.join(table_field) + ' ) ' sql_blank = ' , %s ' * (table_field_len - 1) sql_tail = ' values ( %s ' + sql_blank + ' ) ' sql = sql_head + sql_body + sql_tail Util.printf(sql) self.mysql_executemany(sql, datalist)
def run(self): if self.singal == "URLRequest": dataMonitor = DataMonitor(self.path,self.constants,self.monitor) dataMonitor.init_monitor() self.monitor.monitor_start(self.path+"/monitor.log") log = Log(self.path+"/url_error.log") log.begin_log() ReportJsonSerial(self.monitor).json_start() ReportJsonParallel(self.monitor).json_start() time.sleep(5) log.end_log() else: reportReadFile = ReportReadFile(self.path,self.singal,self.constants,self.monitor) self.lock.acquire() Util.printf("lock acquire".ljust(50,".")) reportReadFile.read_file() self.lock.release() Util.printf("lock release".ljust(50,".")) Util.printf("proccess stop".ljust(50,"."))
def write_rows_file(self, path): #sql = 'select v_id,max_adv,uv,view_all_uv,stock,occu,pos1,pos2,pos3,pos4,pos5,t_occu from dwpdata.core_mobile_pgc where sver is not null and substr(sver,0,1)>=5 and data_date=%s' sql = "select v_id,sum(max_adv),sum(uv),sum(view_all_uv),sum(stock),sum(occu),sum(pos1),sum(pos2),sum(pos3),sum(pos4),sum(pos5),sum(t_occu) from dwpdata.core_mobile_pgc where sver is not null and sver <> 'null' and regexp_like(sver, '^\d') and substr(sver,0,1)>=5 and data_date=%s group by v_id " Util.printf(sql % self.monitorReport.mydate.get_now() ) rows = self.oracle_fetchall(sql % self.monitorReport.mydate.get_now()) self.save_rows(rows, path)
def hive_shell_wrap(self,sqls): sql = sqls.split("&") Util.printf('sql=%s | log=%s \n' % (sql[0], sql[1])) self.hive_shell_exe(str(sql[0]),sql[1])
def write_rows_file(self, path): sql = 'select v_id,sum(max_adv),sum(uv),sum(view_all_uv),sum(stock),sum(occu),sum(pos1),sum(pos2),sum(pos3),sum(pos4),sum(pos5),sum(t_occu) from dwpdata.core_mobile_pgc where sver is not null and substr(sver,0,1)>=5 and data_date=%s group by v_id ' Util.printf(sql % self.monitorReport.mydate.get_now()) rows = self.oracle_fetchall(sql % self.mydate.get_now()) self.save_rows(rows, path)
def write_rows_file(self, path): #sql = 'select v_id,max_adv,uv,view_all_uv,stock,occu,pos1,pos2,pos3,pos4,pos5,t_occu from dwpdata.core_mobile_pgc where sver is not null and substr(sver,0,1)>=5 and data_date=%s' sql = "select v_id,sum(max_adv),sum(uv),sum(view_all_uv),sum(stock),sum(occu),sum(pos1),sum(pos2),sum(pos3),sum(pos4),sum(pos5),sum(t_occu) from dwpdata.core_mobile_pgc where sver is not null and sver <> 'null' and regexp_like(sver, '^\d') and substr(sver,0,1)>=5 and data_date=%s group by v_id " Util.printf(sql % self.monitorReport.mydate.get_now()) rows = self.oracle_fetchall(sql % self.monitorReport.mydate.get_now()) self.save_rows(rows, path)
def wrapper(*args, **kw): Util.printf('call %s():' % func.__name__) return func(*args, **kw)
def check_required(self, func): Util.printf('check some condition') return func
def get_conn(self): try: conn = cx_Oracle.connect('%s/%s@%s/%s' % (self.user, self.passwd, self.host, self.db)) return conn except Exception, e: Util.printf(e)
def check_required(self,func): Util.printf('check some condition') return func
def measure_time(*args,**kwargs): t1=time.time() result=fn(*args,**kwargs) t2=time.time() Util.printf('call %s(): took %s seconds' % (fn.__name__,str(t2-t1))) return result
def hive_shell_wrap(self, sqls): sql = sqls.split("&") Util.printf('sql=%s | log=%s \n' % (sql[0], sql[1])) self.hive_shell_exe(str(sql[0]), sql[1])
def write_rows_file(self, path): #sql = 'select v_id,max_adv,uv,view_all_uv,stock,occu,pos1,pos2,pos3,pos4,pos5,t_occu from dwpdata.core_mobile_pgc where sver is not null and substr(sver,0,1)>=5 and data_date=%s' sql = 'select v_id, sum(allstock),sum(stock),SUM(alloccu),sum(alltoccu),sum(occu),sum(t_occu),sum(vv),sum(allvv) from dwpdata.core_mobile_pgc where data_date=%s group by v_id ' Util.printf(sql % self.monitorReport.mydate.get_now() ) rows = self.oracle_fetchall(sql % self.monitorReport.mydate.get_now()) self.save_rows(rows, path)
def write_rows_file(self, path): sql = 'select v_id,sum(max_adv),sum(uv),sum(view_all_uv),sum(stock),sum(occu),sum(pos1),sum(pos2),sum(pos3),sum(pos4),sum(pos5),sum(t_occu) from dwpdata.core_mobile_pgc where sver is not null and substr(sver,0,1)>=5 and data_date=%s group by v_id ' Util.printf(sql % self.monitorReport.mydate.get_now() ) rows = self.oracle_fetchall(sql % self.mydate.get_now()) self.save_rows(rows, path)