def set(gkey,data): """上传值,先从redis缓存中取出item的id,再匹配itemkey入history库 参数说明: gkey group 的gkey data 数据集 最好一次不要超过100个 {'itemkey':[值,生成时间默认为当前时间戳]} """ stat_db = mysqldb.get_db() rdb = rediswrap.get_redis() kys = list(data.keys()) #加入表标识以确定目标表 kys.append('mrk') itm = rdb.hmget(RD_ITM_KEY_PRFX+gkey,kys) itm_dict = {} if itm: mrk=itm.pop().split(',') hst_tb = get_hst_name(mrk[1]) value = [] for i in range(0,len(itm)): if itm[i] : tmp = data[kys[i]] clock = tmp[1] if tmp[1] else int(time.time()) value.append("('%s','%s','%s')" %(itm[i],tmp[0],clock)) if value: res,desc = stat_db.query("insert into %s(itemid,val,clock)values%s" % (hst_tb,','.join(value)),1) if res ==-1 : return [res,str(desc)] return res,desc else: return [-1,"no items find."] return [-2,'items not in redis.']
def get(gkey,itm=[],start_time=0,stop_time=0,sort='clock asc',groupby=0,page=None): """ 获取统计数据 参数说明 itm itemid列表 为空时提取整个group的记录 start_time 开始时间戮 stop_time 结构时间戮 sort 排序方式 groupby 分组方式 page 分页参数集 {'site':每页数据量,'num':页码} 默认返回所有记录 """ stat_db = mysqldb.get_db() rdb = rediswrap.get_redis() sql_item = {'fields':'*'} r_itmkey = RD_ITM_KEY_PRFX+gkey if itm: itm.append('mrk') itmids = rdb.hmget(r_itmkey,itm) mrk = itmids.pop().split(',') else: mrk = rdb.hget(r_itmkey,'mrk') itmids = rdb.hvals(r_itmkey) itmids.remove(mrk) mrk = mrk.split(',') ids = [k for k in itmids if k ] sql_item['table'] = get_hst_name(mrk[1]) sql_item['where'] = " itemid in (%s) " % ",".join(ids) start_time = utils.timestamp(start_time) if start_time else utils.timestamp(0,'d') stop_time = utils.timestamp(stop_time) if stop_time else int(time.time()) sql_item['where'] += " and clock>=%s and clock <%s" % (start_time,stop_time) sql_item['order'] = sort if groupby: if groupby ==1: sql_item['group'] = 'itemid' elif groupby == 2: sql_item['group'] = 'clock' else: sql_item['group'] = 'itemid,clock' sql_item['fields'] = "itemid,sum(val) as val,clock" #分页这个mark一下。待定 if page: s = page['num']*page['site'] sql_item['limit'] = "%s,%s" %(s,page['site']) res,desc = stat_db.query(sql_item) #取得items的名称 item_lab = {} if res == 0 and desc: itm_tb = "stat_item_" + mrk[0] if mrk[0] else "stat_item" rs, ds = stat_db.query("select name,id from %s where id in(%s)" %(itm_tb,",".join(ids) )) if rs==0 and ds: for row in ds : item_lab[row['id']]=row['name'] return 0,[item_lab,desc] return 0,[{},[]]
def gettags(kw): """标签缓存key: biz:prod:targ test: curl -l -H "Content-Type: application/json" -X POST -d '{"biznum":"tags.corp","tag_id":101,"lv2rows":13,"lv3rows":1}' http://192.168.10.126:6000/biz_tags/gettags 参数说明: biznum:业务配置标识 tag_id:父标签 lv2rows:二级标签提取数量 lv3rows:三级标签提取数量 """ biznum = kw.get('biznum','') sort = "" tag_id = str(kw.get('tag_id',"")) tag_id_len = 0 if len(tag_id) < 3 else len(tag_id) lv2rows = kw.get('lv2rows',0) lv3rows = kw.get('lv3rows',0) if not biznum: return [-6,'parameter biznum not set.'] bizcnf = get_cnf_val(biznum,sconf.BIZ) #取业务配置 if not bizcnf: return sconf.err_handle.biznum_not_config ttl = bizcnf.get('ttl',600) isort = bizcnf.get('sort','total_num') #取数据 rdb = rediswrap.get_redis('cache') ckey = "%s.%s%s%s%s" %(biznum,sort,tag_id,lv2rows,lv3rows) res = rdb.get(ckey) if res: return 0,json.loads(res) res = rdb.get("biz:targ.%s"%bizcnf['table']) if not res: s,res = tagsclass.prod_targs(biznum) else: res = json.loads(res) #组织数据 #tag_id of level 1 data={} if tag_id_len ==3 and tag_id in res and 'extn' in res[tag_id]: data = res[tag_id] elif tag_id_len ==6: data = res[str(tag_id)[:3]]['extn'][tag_id] #tag_id of all elif not tag_id: data={'info':{'tag_id':0}} data['extn'] = res if data and 'extn' in data: res = tag_recursion(data,lv2rows,lv3rows) rdb.setex(ckey,json.dumps(res),ttl) return 0,res else: return 0,{}
def prod_targs(key): bizcnf = get_cnf_val(key,sconf.BIZ) #取业务配置 if not bizcnf: return sconf.err_handle.biznum_not_config ttl = bizcnf.get('ttl',600) #取数据库配置 dbinfo = get_host_by_data(bizcnf['source']) if not dbinfo: return sconf.err_handle.db_not_config dbinfo['dbname']=bizcnf['source'].split('.')[-1] rdb = rediswrap.get_redis('cache') rkey = "biz:targ.%s"%bizcnf['table'] db = dbclass(dbinfo) res,desc = db.connect() if res == -1: return sconf.err_handle.db_err sql_item={} sql_item['table'] = bizcnf['table'] sql_item['fields'] = bizcnf['fields'] sql_item['order'] = "tag_id asc" res,desc = db.query(sql_item) if res == -1: return sconf.err_handle.db_err if desc: obj = {} for row in desc: tid = str(row['tag_id']) if len(tid) ==3: obj[row['tag_id']]={"info":row} elif len(tid) ==6: pid = int(tid[:3]) if 'extn' not in obj[pid]: obj[pid]['extn']={} obj[pid]['extn'][row['tag_id']]={"info":row} elif len(tid)==9: ppid = int(tid[:3]) pid = int(tid[:6]) if 'extn' not in obj[ppid]['extn'][pid]: obj[ppid]['extn'][pid]['extn']={} obj[ppid]['extn'][pid]['extn'][row['tag_id']]={"info":row} #try: #rdb.set(rkey,gzip.compress(json.dumps(res).encode())) rdb.set(rkey,json.dumps(obj)) #except: # pass return 0,obj
def reg_items2redis(gkey,itm=[]): """将items存入redis缓存 """ rdb = rediswrap.get_redis() stat_db = mysqldb.get_db() hash_tb = rediswrap. get_hash(gkey, system='default',serialized_type='string') res,desc = get_groups([gkey]) if res==0 and desc: itm_tb = get_itm_name(desc[0]['items_mrk']) rdb.hsetnx(RD_ITM_KEY_PRFX+gkey,'mrk',"%s,%s" %(desc[0]['items_mrk'],desc[0]['history_mrk'])) gid = desc[0]['gid'] i=0 #设置了itemkey列表,依据itemkey取值 if itm: itm_len = len(itm) while True: j = i+50 if i+50 < itm_len else itm_len if i == itm_len: break key = ['"%s"' % k for k in itm[i:j]] sql = "select id,itemkey from %s where itemkey in (%s)" % (itm_tb, ",".join(key)) res,desc = stat_db.query(sql) new_dict = {} if res ==0 and desc: for row in desc: new_dict[row['itemkey']]=row['id'] rs = rdb.hmset(RD_ITM_KEY_PRFX+gkey,new_dict) i = j #未设置itemkey列表,依据gkey取出该组所有的items else: res, desc = stat_db.query("select min(id) as mnid,max(id) as mxid from %s where gid = %s" % (itm_tb,gid)) mnid = desc[0]['mnid'] mxid = desc[0]['mxid'] limit = 50 while True: if mnid == mxid: break sql = "select id,itemkey from %s where id > %s and gid=%s limit %s" % (itm_tb,mnid,gid,limit) res,desc = stat_db.query(sql) new_dict = {} if res ==0 and desc: for row in desc: new_dict[row['itemkey']]=row['id'] mnid = row['id'] rdb.hmset(RD_ITM_KEY_PRFX+gkey,new_dict)
def update_item_key(gkey,oldkey,newkey): stat_db = mysqldb.get_db() res,desc = get_groups([gkey]) rdb = rediswrap.get_redis() if res==0 and desc: itm_tb = get_itm_name(desc[0]['items_mrk']) res,des = stat_db.update(itm_tb,{"itemkey":newkey},"itemkey='%s'" % oldkey) if res ==0 and des: id = rdb.hget(RD_ITM_KEY_PRFX+gkey,oldkey) if id : rdb.hset(RD_ITM_KEY_PRFX+gkey,newkey,id) elif res == -1: return err_handle.db_err return res,des return res,desc
def send(gkey,data): """在redis中缓存统计数据 """ rdb = rediswrap.get_redis() keys = list(data.keys()) ids = rdb.hmget(RD_ITM_KEY_PRFX + gkey,keys) if not ids: return [-7,"items not find in redis."] x = 0 false_key=[] for i in range(0,len(ids)): if ids[i]: rdb.hincrby(RD_ITM_HST_TMP_PRFX+gkey,ids[i],data[keys[i]]) x +=1 else: false_key.append(keys[i]) return [0,x,false_key]
def sphinx2redis(cnfkey): """通过配置文件,获取统计数据 """ #取业务配置 bizcnf = get_cnf_val(cnfkey,sconf.BIZ) if not bizcnf: return sconf.err_handle.biznum_not_config rdb = rediswrap.get_redis('cache') for i in range(0,len(bizcnf['prama'])): itm = bizcnf['prama'][i] source = itm['source'].split('.') if source[1] == 'sphinx': host_info = get_host_by_data(itm['source']) if not host_info : return sconf.sphinx_index_not_found sp = sphinx(host_info['host'],host_info['port']) expression = itm['expression'] expression['index'] = source[2] total_found = 0 while True: #if total_found >0: # if expression['pageSize'] * expression['page'] >=total_found: # break # expression['page'] +=1 sp.initQuery(itm['expression']) rs = sp.RunQueries() pprint(rs) if rs and rs[0]['status']==0: total_found = rs[0]['total_found'] _items = {} for row in rs[0]['matches']: if itm['key'] in row['attrs'] and itm['value'] in row['attrs']: _items[row['attrs'][itm['key']]]=row['attrs'][itm['value']] if _items: print(_items) #res = rdb.mset(cnfkey,_items) else: print(sp._error) break break
def del_redis_items(gkey): """删除item缓存 """ rdb = rediswrap.get_redis() rdb.delete(RD_ITM_KEY_PRFX+gkey)
def mod_redis_mrk(gkey,items_mrk,history_mrk): """更改item在redis中的items_mrk,items_mrk """ rdb = rediswrap.get_redis() rdb.hsetnx(RD_ITM_KEY_PRFX+gkey,'mrk',"%s,%s" %(items_mrk,history_mrk))
def cache_set(key,val,ttl=0): rdb = rediswrap.get_redis('cache') val = gzip.compress(val.encode()) rdb.setex(key,val,ttl)
def cache_get(key): rdb = rediswrap.get_redis('cache') return gzip.decompress(rdb.get(key)).decode()
def gettags_base(kw): """标签缓存key: biz:prod:targ """ biznum = kw.get('biznum','') sort = kw.get('sort','pro') tag_id = int(kw.get('tag_id',0)) tag_id_len = 0 if len(str(tag_id)) < 3 else len(str(tag_id)) if not biznum: return [-6,'parameter biznum not set.'] bizcnf = get_cnf_val(biznum,sconf.BIZ) #取业务配置 if not bizcnf: return sconf.err_handle.biznum_not_config lv2rows = kw.get('lv2rows',bizcnf['lv2rows']) lv3rows = kw.get('lv3rows',bizcnf['lv3rows']) ttl = kw.get(bizcnf['ttl'],600) isort = kw.get(bizcnf['sort'],'total_num') #取数据库配置 dbinfo = get_host_by_data(bizcnf['source']) if not dbinfo: return sconf.err_handle.db_not_config dbinfo['dbname']=bizcnf['source'].split('.')[-1] rdb = rediswrap.get_redis('cache') res = rdb.get("%s.%s%s%s%s" %(biznum,sort,tag_id,lv2rows,lv3rows)) if res: return gzip.decompress(res) db = dbclass(dbinfo) res,desc = db.connect() if res == -1: return sconf.err_handle.db_err sql_item={} sql_item['table'] = bizcnf['table'] sql_item['fields'] = bizcnf['fields'] if tag_id_len ==3: sql_item['where'] = "tag_id = %s or (tag_id>%s000 and tag_id <%s000) or (tag_id>%s000000 and tag_id<%s000000) " % (tag_id,tag_id,(tag_id+1),tag_id,(tag_id+1)) elif tag_id_len >=6: sql_item['where'] = "tag_id = %s or (tag_id>%s000 and tag_id <%s000)" % (tag_id,tag_id,(tag_id+1)) sql_item['order'] = "tag_id asc" pprint.pprint(kw) pprint.pprint(sql_item) res,desc = db.query(sql_item) if res == -1: return sconf.err_handle.db_err if desc: tag_id_len = 3 if tag_id_len==0 else tag_id_len obj = {} for row in desc: tid = str(row['tag_id']) if len(tid)-tag_id_len ==0: obj[row['tag_id']]={"info":row} elif len(tid) - tag_id_len ==3: pid = int(tid[:tag_id_len]) if 'extn' not in obj[pid]: obj[pid].update({'extn':{"info":row}}) obj[pid]['extn'].update({row['tag_id']:row}) elif len(tid) - tag_id_len==6: ppid = int(tid[:tag_id_len]) pid = int(tid[:tag_id_len+3]) if 'extn' not in obj[ppid]['extn'][pid]: obj[ppid]['extn'][pid].update({'extn':{"info":row}}) obj[ppid]['extn'][pid].update({row['tag_id']:row}) #pprint.pprint(obj) return 0,obj
def sumdelay(): """ 检测redis中的统计缓存,根据stat_item_group.sumdelay 将满足条件的统计数据入库,并清除缓存 """ rdb = rediswrap.get_redis() db = mysqlwrap.get_db() now = time.localtime() fields = rdb.keys(stat_base.RD_ITM_HST_TMP_PRFX+'*') if fields: gkeys = [f.split(':')[-1] for f in fields] todo_gkeys=[] step = 20 #初始化时间 clock = int(time.time())-120 for i in range(0,len(gkeys),step): #每次提取step条group记录 ks = ["'%s'" % k for k in gkeys[i:i+step]] sql = "select gkey,sumdelay,history_mrk from stat_item_group where gkey in (%s)" % ",".join(ks) res, desc = db.query(sql) if res==0 and desc: for row in desc: gk = "" #按时汇总 if row['sumdelay'] == 2: clock = utils.timestamp(clock,'h') gk =row['gkey'] #按天汇总,当前时间0点 elif row['sumdelay'] == 1 and now.tm_hour==0: clock = utils.timestamp(0,'d') gk =row['gkey'] #按月汇总,当天是1号,时间是0点 elif row['sumdelay'] == 3 and (now.tm_mday==1 or now.tm_hour==0) : clock = utils.timestamp(0,'d') gk =row['gkey'] #按周汇总,当天是周1,时间是0点 elif row['sumdelay'] == 4 and (now.tm_wday!=1 or now.tm_hour!=0) : clock = utils.timestamp(0,'d') gk =row['gkey'] #gk =row['gkey'] if gk: #todo_gkeys.append(row) #print(stat_base.RD_ITM_HST_TMP_PRFX+row['gkey'],RD_TMP_PRF+row['gkey']) #rdb.rename(stat_base.RD_ITM_HST_TMP_PRFX+row['gkey'],RD_TMP_PRF+row['gkey']) key = stat_base.RD_ITM_HST_TMP_PRFX+gk r_item = rdb.hgetall(key) rdb.delete(key) tb = stat_base.get_hst_name(row['history_mrk']) cnt = 0 if r_item: logger.info("sum stat group %s from %s into %s" % (row['gkey'],key,tb)) ks = list(r_item.keys()) #每次写50条记录 for i in range(0,len(ks),50): value = ["('%s','%s','%s')" %(j,r_item[j],clock) for j in ks[i:i+50]] ins_sql = "insert into %s(itemid,val,clock)values%s" % (tb,','.join(value)) res,desc = db.query(ins_sql,1) if res == -1: logger.error(str(desc)) data_loge.info(ins_sql) else: cnt+=len(value) logger.info("[%s] count:[%s] ins:[%s] rows." %(gk,len(ks),cnt)) #汇总数据入库 """
except Exception as e: logger.info(str(e)) logger.info("sumdelay end.") time.sleep(1) if __name__=="__main__": import json sconf.SYS = json.loads("".join(open('../conf/sys.json').read().split())) sconf.HOST = json.loads("".join(open('../conf/host.json').read().split())) sconf.DATA_SOURC = json.loads("".join(open('../conf/databases.json').read().split())) #biz_info = json.loads("".join(open('../conf/biz.json').read().split())) biz_info = json.loads(open('../conf/biz.json').read().replace('\n','').replace('\t','')) #加载数据库 mysqlwrap.setup_db('default',sconf.SYS['mysql']) mysqlwrap.pool_monitor() rediswrap.setup_redis('default',sconf.SYS['redis']['host'],sconf.SYS['redis']['port']) rdb = rediswrap.get_redis() #res = rdb.scan(0,match='stat*',count=10 ) #res = rdb.keys('test*') #print(help(rdb.rename)) #res = rdb.renamenx ('test:zset.1','test:rename.1') #res = rdb.hgetall('stat:items:Crawler_stat.json' ) #res = rdb.hgetall('stat:hst:tmp:Crawler_stat.json' ) #print(res) sumdelay()
def sphinx2redis_ex(cnfkey): """通过配置文件,获取统计数据 """ #取业务配置 bizcnf = get_cnf_val(cnfkey,sconf.BIZ) if not bizcnf: return sconf.err_handle.biznum_not_config rdb = rediswrap.get_redis('cache') dbinfo = get_host_by_data("base.mysql.biz72_product") dbinfo['dbname']="biz72_product" db = mysqlwrap.dbclass(dbinfo) db.connect() sql = "select tag_id from pro_tag where tag_id>100 and tag_id<200" res,desc = db.query(sql) print("rows:",len(desc)) source="base.sphinx.IDX_com_corp_dist" host_info = get_host_by_data(source) sp = sphinx(host_info['host'],host_info['port']) prama = { "querymod":"SPH_MATCH_EXTENDED2", "pageSize":1, "page":1, "intType":{ }, "index":"IDX_com_corp_dist" } itm1 = {} itm2 = {} itm3 = {} x=0 st = int(time.time()) for row in desc: x+=1 if x%100==0: print(x,time.time()-st) tag_len = len(str(row['tag_id'])) if tag_len == 3: f = "r_com" elif tag_len == 6: f = "c_com" elif tag_len == 9: f = "com" prama['intType'][f]=str(row['tag_id']) #prama['intType']["com"]='101105105' sp.initQuery(prama) rs = sp.RunQueries() if rs and rs[0]['status']==0: total_found = rs[0]['total_found'] print("tag_id",row['tag_id'],"total_found",total_found) if total_found >0: rdb.hset('stat2cach.tags_corp',row['tag_id'],total_found) if tag_len == 3: itm1[row['tag_id']]=total_found elif tag_len == 6: itm2[row['tag_id']]=total_found elif tag_len == 9: itm3[row['tag_id']]=total_found else: print(sp._error) print("time:",time.time()-st) print("itm1","len:",len(itm1),"sum:",sum(list(itm1.values()))) print("itm2","len:",len(itm2),"sum:",sum(list(itm2.values()))) print("itm3","len:",len(itm3),"sum:",sum(list(itm3.values())))