def outputa(lastMethod,paras,paraUs): allu={} for p in paras: print lastMethod+"-"+p+"\t"+str(dictInfo.info(paraUs[p])) for u in paraUs[p]: if u not in allu: allu[u]=paraUs[p][u] else: allu[u]+=paraUs[p][u] paraUs[p]={} print lastMethod+"\t"+str(dictInfo.info(allu))
def outputa(lastMethod, paras, paraUs): allu = {} for p in paras: print lastMethod + "-" + p + "\t" + str(dictInfo.info(paraUs[p])) for u in paraUs[p]: if u not in allu: allu[u] = paraUs[p][u] else: allu[u] += paraUs[p][u] paraUs[p] = {} print lastMethod + "\t" + str(dictInfo.info(allu))
def count(f): lineNum=0 uNum=0 us={} nus={} uids={} for line in f: cols=line.strip().split("\t") if len(cols) < APP_LOG_COLUMNS4: continue lineNum+=1 u=cols[USER_CID] uuid=cols[UUID_ID] if uuid not in uids: uids[uuid]=1 else: uids[uuid]+=1 if PositiveNumP.match(u): if u not in us: us[u]={} uNum+=1 if uuid not in us[u]: us[u][uuid]=1 else: us[u][uuid]+=1 else: if uuid not in nus: nus[uuid]=1 else: nus[uuid]+=1 sum=0 for u in us: sum+=len(us[u]) if len(us[u]) >= 3: print u,us[u] v=0 for uuid in us[u]: v+=us[u][uuid] us[u]=v print "注册用户请求日志行数统计信息" print dictInfo.info(us) print "非注册用户请求日志行数统计信息" print dictInfo.info(nus) print "注册用户日志行数\t总日志行数\t注册用户占比" print "%d\t%d\t%.4f"%(uNum,lineNum,float(uNum)/lineNum) print "总uuid数\t注册uuid+非注册uuid\t注册与非注册uuid重叠比例" print "%d\t%d\t%.4f"%(len(uids),len(nus)+sum,float(len(nus)+sum)/len(uids)-1.0) print "注册用户数\t注册uuid数\t注册用户多个uuid比例" print "%d\t%d\t%.4f"%(len(us),sum,float(sum)/len(us)-1.0) print "注册用户数\t注册用户日志行数\t注册用户平均行数" print "%d\t%d\t%.4f"%(len(us),uNum,float(uNum)/len(us)) print "非注册uuid数\t非注册用户日志行数\t非注册用户平均行数" print "%d\t%d\t%.4f"%(len(nus),lineNum-uNum,float(lineNum-uNum)/len(nus))
def methodDistribute(): ms = {} for line in sys.stdin: cols = line.strip().split("\t") if len(cols) < APP_LOG_COLUMNS: continue method = cols[METHOD_CID] if method not in ms: ms[method] = 1 else: ms[method] += 1 for m in ms: print m, ms[m] print dictInfo.info(ms)
def methodDistribute(): ms={} for line in sys.stdin: cols=line.strip().split("\t") if len(cols) < APP_LOG_COLUMNS: continue method=cols[METHOD_CID] if method not in ms: ms[method]=1 else: ms[method]+=1 for m in ms: print m,ms[m] print dictInfo.info(ms)
def ipDis(): ips = {} lastIp = {} wrong = 0 total = 0 ws = {} for line in sys.stdin: cols = line.strip().split("\t") if len(cols) < APP_LOG_COLUMNS: continue if line.find("mobiledevice.initandroiddevice") > 0: continue u = ActionUser(cols) if u == None or u == "": continue ip = cols[IP_CID] if ip == "": continue if ip not in ips: ips[ip] = u else: v = ips[ip] if type(v) == dict: if u not in v: v[u] = 1 else: if u != lastIp[ip]: wrong += 1 if ip not in ws: ws[ip] = 1 else: ws[ip] += 1 else: if v != u: ips[ip] = {} ips[ip][u] = 1 ips[ip][v] = 1 lastIp[ip] = u total += 1 for ip in ips: v = ips[ip] if type(v) == dict: ips[ip] = len(v) else: ips[ip] = 1 print dictInfo.info(ws) print "total", total print "wrong", wrong print dictInfo.info(ips)
def ipDis(): ips={} lastIp={} wrong=0 total=0 ws={} for line in sys.stdin: cols=line.strip().split("\t") if len(cols) < APP_LOG_COLUMNS: continue if line.find("mobiledevice.initandroiddevice") > 0: continue u=ActionUser(cols) if u == None or u=="": continue ip=cols[IP_CID] if ip == "": continue if ip not in ips: ips[ip]=u else: v=ips[ip] if type(v) == dict: if u not in v: v[u]=1 else: if u != lastIp[ip]: wrong+=1 if ip not in ws: ws[ip]=1 else: ws[ip]+=1 else: if v != u: ips[ip]={} ips[ip][u]=1 ips[ip][v]=1 lastIp[ip]=u total+=1 for ip in ips: v=ips[ip] if type(v) == dict: ips[ip]=len(v) else: ips[ip]=1 print dictInfo.info(ws) print "total",total print "wrong",wrong print dictInfo.info(ips)
def join(): ds = {} ds["divTime"] = 0 ds["divNum"] = 0 ds["wrongTime"] = 0 ds["wrongNum"] = 0 ds["not"] = 0 ds["viewNoReceive"] = 0 divs = {} lastU = "" ms = {} for line in sys.stdin: cols = line.strip().split("\t") if len(cols) < 5: continue u = cols[0] if lastU == "": lastU = u if lastU != u: output(lastU, ms, ds, divs) lastU = u ts = {} ms = {} if cols[4].startswith("4"): continue method = cols[1] message = cols[2] time = int(cols[3]) if message not in ms: ms[message] = {} ms[message][method] = time if lastU != "": output(lastU, ms, ds, divs) wrongTime = ds["wrongTime"] wrongNum = ds["wrongNum"] divTime = ds["divTime"] divNum = ds["divNum"] print "view but not received:%d\n" % (ds["viewNoReceive"]) print "not:%d\n" % (ds["not"]) print "wrong:%d\t%d\t%f\n" % (wrongTime, wrongNum, float(wrongTime) / (wrongNum + 1e-16)) print "div:%d\t%d\t%f\n" % (divTime, divNum, float(divTime) / (divNum + 1e-16)) print dictInfo.info(divs) print ccinfo.info(divs)
def join(): ds={} ds["divTime"]=0 ds["divNum"]=0 ds["wrongTime"]=0 ds["wrongNum"]=0 ds["not"]=0 ds["viewNoReceive"]=0 divs={} lastU="" ms={} for line in sys.stdin: cols=line.strip().split("\t") if len(cols) < 5: continue u=cols[0] if lastU == "": lastU = u if lastU != u: output(lastU,ms,ds,divs) lastU=u ts={} ms={} if cols[4].startswith("4"): continue method=cols[1] message=cols[2] time=int(cols[3]) if message not in ms: ms[message]={} ms[message][method]=time if lastU != "": output(lastU,ms,ds,divs) wrongTime=ds["wrongTime"] wrongNum=ds["wrongNum"] divTime=ds["divTime"] divNum=ds["divNum"] print "view but not received:%d\n"%(ds["viewNoReceive"]) print "not:%d\n"%(ds["not"]) print "wrong:%d\t%d\t%f\n"%(wrongTime,wrongNum,float(wrongTime)/(wrongNum+1e-16)) print "div:%d\t%d\t%f\n"%(divTime,divNum,float(divTime)/(divNum+1e-16)) print dictInfo.info(divs) print ccinfo.info(divs)
def appleUserId(): right = 0 wrong = 0 did = "" pds = {} ds = {} for line in sys.stdin: cols = line.strip().split("\t") if len(cols) <= APP_LOG_COLUMNS: continue did = cols[UUID_ID] if did == "haodou" or did == "" or did == "null" or did == "haodoua1000033d709ec6": continue ip = cols[IP_CID] if cols[MEDIA_CID] == "appstore": uid = uuid(cols) if uid == None: continue if PositiveNumP.match(uid): if PositiveNumP.match(cols[USER_CID]): right += 1 print line else: wrong += 1 if did not in pds: pds[did] = {} if ip not in pds[did]: pds[did][ip] = 1 else: if did not in ds: ds[did] = {} if ip not in ds[did]: ds[did][ip] = 1 for did in pds: pds[did] = len(pds[did]) for did in ds: ds[did] = len(ds[did]) import dictInfo print dictInfo.info(ds) print dictInfo.info(pds) print "right", right print "wrong", wrong
def appleUserId(): right=0 wrong=0 did="" pds={} ds={} for line in sys.stdin: cols=line.strip().split("\t") if len(cols) <= APP_LOG_COLUMNS: continue did=cols[UUID_ID] if did == "haodou" or did == "" or did == "null" or did == "haodoua1000033d709ec6": continue ip=cols[IP_CID] if cols[MEDIA_CID] == "appstore": uid=uuid(cols) if uid == None: continue if PositiveNumP.match(uid): if PositiveNumP.match(cols[USER_CID]): right+=1 print line else: wrong+=1 if did not in pds: pds[did]={} if ip not in pds[did]: pds[did][ip]=1 else: if did not in ds: ds[did]={} if ip not in ds[did]: ds[did][ip]=1 for did in pds: pds[did]=len(pds[did]) for did in ds: ds[did]=len(ds[did]) import dictInfo print dictInfo.info(ds) print dictInfo.info(pds) print "right",right print "wrong",wrong
def tagidDis(): rkt = {} kt = {} t = {} knt = {} ks = {} names = {} for line in sys.stdin: cols = line.strip().split("\t") if len(cols) < METHOD_CID + 1: continue v = cols[VERSION_CID] if v != "4.0" and v != "v4.0": continue method = cols[METHOD_CID] if not method.startswith("search.getlist"): continue para = cols[PARA_ID] tid = getValue(para, "tagid") kw = getValue(para, "keyword") rqid = getValue(para, "return_request_id") if tid == "null": if kw != None and kw != "": if kw not in knt: knt[kw] = 1 else: knt[kw] += 1 elif tid == "" or tid == None: if kw not in ks: ks[kw] = 1 else: ks[kw] += 1 if tid != None and tid != "" and tid != "null": if kw != None and kw != "": if rqid != None and rqid != "": if tid not in rkt: rkt[tid] = 1 else: rkt[tid] += 1 if tid not in kt: kt[tid] = 1 else: kt[tid] += 1 if tid not in names: names[tid] = kw else: if tid not in t: t[tid] = 1 else: t[tid] += 1 print "len of rkt", len(rkt) print "len of kt", len(kt) print "len of t", len(t) for tid in rkt: if tid not in names: names[tid] = "" print tid, names[tid], rkt[tid] for tid in t: if tid in names: print tid, names[tid], t[tid] else: print tid, t[tid] for tid in kt: print "kt", tid, names[tid], kt[tid] info = dictInfo.info(knt) print info for k in info["top10"]: print k[0], k[1] info = dictInfo.info(ks) print info for k in info["top10"]: print k[0], k[1]
import dictInfo if __name__ == "__main__": d = {2: 3, "ty": 4, 10: 4, 12: 4, 99: 100} print dictInfo.info(d) print dictInfo.topn(d, 3)
def tagidDis(): rkt={} kt={} t={} knt={} ks={} names={} for line in sys.stdin: cols=line.strip().split("\t") if len(cols) < METHOD_CID+1: continue v=cols[VERSION_CID] if v != "4.0" and v != "v4.0": continue method=cols[METHOD_CID] if not method.startswith("search.getlist"): continue para=cols[PARA_ID] tid=getValue(para,"tagid") kw=getValue(para,"keyword") rqid=getValue(para,"return_request_id") if tid == "null": if kw != None and kw != "": if kw not in knt: knt[kw]=1 else: knt[kw]+=1 elif tid == "" or tid == None: if kw not in ks: ks[kw]=1 else: ks[kw]+=1 if tid != None and tid != "" and tid != "null": if kw != None and kw != "": if rqid != None and rqid != "": if tid not in rkt: rkt[tid]=1 else: rkt[tid]+=1 if tid not in kt: kt[tid]=1 else: kt[tid]+=1 if tid not in names: names[tid]=kw else: if tid not in t: t[tid]=1 else: t[tid]+=1 print "len of rkt",len(rkt) print "len of kt",len(kt) print "len of t",len(t) for tid in rkt: if tid not in names: names[tid]="" print tid,names[tid],rkt[tid] for tid in t: if tid in names: print tid,names[tid],t[tid] else: print tid,t[tid] for tid in kt: print "kt",tid,names[tid],kt[tid] info=dictInfo.info(knt) print info for k in info["top10"]: print k[0],k[1] info=dictInfo.info(ks) print info for k in info["top10"]: print k[0],k[1]
import dictInfo if __name__=="__main__": d={2:3,"ty":4,10:4,12:4,99:100} print dictInfo.info(d) print dictInfo.topn(d,3)
def output(lastm, us, vs): s = lastm info = dictInfo.info(us) print lastm + "\t" + str(info) info = dictInfo.info(vs) print "v_" + lastm + "\t" + str(info)
def output(lastm,us,vs): s=lastm info=dictInfo.info(us) print lastm+"\t"+str(info) info=dictInfo.info(vs) print "v_"+lastm+"\t"+str(info)