def sonFavRate(): print "日期\t版本\t栏目名\t栏目进入数\t列表页菜谱点击数\t菜谱收藏数\t菜谱点击率\t菜谱收藏率\t栏目菜谱收藏率" files = os.listdir("/home/zhangzhonghui/data/seqHit/") for file in sorted(files): if not file.startswith("20"): continue day = file[file.rfind("/") + 1:] ts = {} for line in open("/home/zhangzhonghui/data/seqHit/" + file): if line.startswith("版本"): continue (v, stype, sret) = seqHit.readLine(line) selfNum = 0 for st in sret.sonTypes: if st in recipeInfo: selfNum += DictUtil.sum(sret.sonTypes[st]) if selfNum > 0: if recipeFavMethod in sret.gsonTypes: favNum = DictUtil.sum(sret.gsonTypes[recipeFavMethod]) if stype not in ts: ts[stype] = [0, 0, 0] ts[stype][0] += sret.num ts[stype][1] += selfNum ts[stype][2] += favNum print day + "\t" + v + "\t" + stype + favStr( sret.num, selfNum, favNum) #"\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f"%(sret.num,selfNum,favNum,selfNum/(sret.num+1e-32),favNum/(selfNum+1e-32),favNum/(sret.num+1e-32)) for stype in ts: (num, hnum, favNum) = ts[stype] print day + "\tall_version\t" + stype + favStr(num, hnum, favNum)
def sonFavRate(): print "日期\t版本\t栏目名\t栏目进入数\t列表页菜谱点击数\t菜谱收藏数\t菜谱点击率\t菜谱收藏率\t栏目菜谱收藏率" files=os.listdir("/home/zhangzhonghui/data/seqHit/") for file in sorted(files): if not file.startswith("20"): continue day=file[file.rfind("/")+1:] ts={} for line in open("/home/zhangzhonghui/data/seqHit/"+file): if line.startswith("版本"): continue (v,stype,sret)=seqHit.readLine(line) selfNum=0 for st in sret.sonTypes: if st in recipeInfo: selfNum+=DictUtil.sum(sret.sonTypes[st]) if selfNum > 0: if recipeFavMethod in sret.gsonTypes: favNum=DictUtil.sum(sret.gsonTypes[recipeFavMethod]) if stype not in ts: ts[stype]=[0,0,0] ts[stype][0]+=sret.num ts[stype][1]+=selfNum ts[stype][2]+=favNum print day+"\t"+v+"\t"+stype+favStr(sret.num,selfNum,favNum) #"\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f"%(sret.num,selfNum,favNum,selfNum/(sret.num+1e-32),favNum/(selfNum+1e-32),favNum/(sret.num+1e-32)) for stype in ts: (num,hnum,favNum)=ts[stype] print day+"\tall_version\t"+stype+favStr(num,hnum,favNum)
def reportSonsFile(method, version, file, ts, dts, v2=100000): if True: for line in open(file): if line.startswith("版本"): continue if True: try: (v, stype, sret) = seqHit.readLine(line) except: sys.stderr.write(line) continue tnum = sret.num + 1e-32 if tnum < 1.0: continue try: v = int(v) except: v = 0 if v >= version and v <= v2 and stype.startswith(method): if len(dts) <= 0 or dts[-1][0] != file: dts.append((file, {})) for type in sret.sonTypes: rate = DictUtil.sum(sret.sonTypes[type]) type = hitItemName.getTypeName(type) if type not in dts[-1][1]: ts[type] = 1 dts[-1][1][type] = 0 dts[-1][1][type] += rate '''
def ds(uuidFile, today): regUserFile = "/home/zhangzhonghui/data/reg/regUser." + today uuids = {} for line in open(regUserFile): cols = line.strip().split() uuids[cols[0]] = [cols[1], ""] for line in open(uuidFile): cols = line.split() if len(cols) < 4: continue uuid = cols[0] if uuid not in uuids: continue day = cols[2] if uuids[uuid][1] == "": uuids[uuid][1] = day else: if uuids[uuid][1] > day: uuids[uuid][1] = day ds = {} for uuid in uuids: OK, lastDay = uuids[uuid] if OK not in ds: ds[OK] = {} div = TimeUtil.daysDiv(lastDay, today) if div not in ds[OK]: ds[OK][div] = 1 else: ds[OK][div] += 1 for OK in ds: print OK, DictUtil.sum(ds[OK]) print ds[OK]
def reportSonsFile(method,version,file,ts,dts,v2=100000): if True: for line in open(file): if line.startswith("版本"): continue if True: try: (v,stype,sret)=seqHit.readLine(line) except: sys.stderr.write(line) continue tnum=sret.num+1e-32 if tnum < 1.0: continue try: v=int(v) except: v=0 if v >= version and v <= v2 and stype.startswith(method): if len(dts) <= 0 or dts[-1][0] != file: dts.append((file,{})) for type in sret.sonTypes: rate=DictUtil.sum(sret.sonTypes[type]) type=hitItemName.getTypeName(type) if type not in dts[-1][1]: ts[type]=1 dts[-1][1][type]=0 dts[-1][1][type]+=rate '''
def ds(uuidFile,today): regUserFile="/home/zhangzhonghui/data/reg/regUser."+today uuids={} for line in open(regUserFile): cols=line.strip().split() uuids[cols[0]]=[cols[1],""] for line in open(uuidFile): cols=line.split() if len(cols) < 4:continue uuid=cols[0] if uuid not in uuids:continue day=cols[2] if uuids[uuid][1] == "": uuids[uuid][1]=day else: if uuids[uuid][1] > day: uuids[uuid][1]=day ds={} for uuid in uuids: OK,lastDay=uuids[uuid] if OK not in ds: ds[OK]={} div=TimeUtil.daysDiv(lastDay,today) if div not in ds[OK]: ds[OK][div]=1 else: ds[OK][div]+=1 for OK in ds: print OK,DictUtil.sum(ds[OK]) print ds[OK]
def count(): ts={} tagFile="/home/zhangzhonghui/data/backup.1203/tagQuery.all.txt" #tagFile="/home/zhangzhonghui/data/backup.1202/tag.txt" for line in open(tagFile): ts[line.strip()]=1 ts["v45_"+line.strip()]=1 dir=sys.argv[1] files=os.listdir(dir) files=sorted(files) for file in files: tck=clickCount.Click() ack=clickCount.Click() if not file.startswith("searchKeyword45.2014-"): continue for line in open(dir+"/"+file): cols=line.strip().split("\t") k=cols[0] if k.startswith(searchKeyword45.CardFix): continue if k.endswith("##total##"): (k,click)=clickCount.readClick(cols) ack.merge(click) continue if k not in ts: continue try: (k,click)=clickCount.readClick(cols) except: sys.stderr.write(line) continue tck.merge(click) voidHit=0 if 0 in click.hitCount: voidHit=click.hitCount[0] hit=DictUtil.sum(click.hit) if clickCount.SumMark in click.hit: hit=click.hit[clickCount.SumMark] if click.search==0: click.search+=1e-12 #print k,click.search #print "%s\t%s\t%d\t%d\t%d\t%f\t%f"%(file[-10:],k,click.search,hit,(click.search-voidHit),float(hit)/click.search,float(click.search-voidHit)/click.search) #print click hit=clickCount.dictSum(tck.hit) voidHit=0 if 0 in tck.hitCount: voidHit=tck.hitCount[0] k="all" if tck.search == 0: tck.search+=1e-3 tn=float(hit)/tck.search an=float(ack.hit[clickCount.SumMark])/ack.search trate=float(tck.search-voidHit)/tck.search arate=float(ack.search-ack.hitCount[0])/ack.search #print "%s\t%f\t%f"%(file[-10:],float(tck.search)/ack.search,trate/arate) print "%s\t%d\t%d\t%d\t%d\t%d\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f"%(file[-10:],ack.search,ack.hit[clickCount.SumMark],tck.search,hit,(tck.search-voidHit),float(hit)/tck.search,trate,arate,tn/an,trate/arate)
def topnRateStr(d,n=500): sd=sorted(d.items(),key=lambda e:e[1],reverse=True) nn=0 sum=DictUtil.sum(d) if sum == 0: sum+=1e-32 s="" for w,c in sd: v=float(c)/sum nn+=1 if nn > n: break s+="("+str(w)+",%d,%.4f"%(c,v)+")" return "["+s+"]"
def topnRateStr(d, n=500): sd = sorted(d.items(), key=lambda e: e[1], reverse=True) nn = 0 sum = DictUtil.sum(d) if sum == 0: sum += 1e-32 s = "" for w, c in sd: v = float(c) / sum nn += 1 if nn > n: break s += "(" + str(w) + ",%d,%.4f" % (c, v) + ")" return "[" + s + "]"
def favRate(): files=os.listdir("/home/zhangzhonghui/data/seqHit/") for file in sorted(files): if not file.startswith("20"): continue for line in open("/home/zhangzhonghui/data/seqHit/"+file): if line.startswith("版本"): continue (v,stype,sret)=seqHit.readLine(line) if stype not in recipeInfo: continue selfNum=sret.num if recipeFavMethod in sret.sonTypes: favNum=DictUtil.sum(sret.sonTypes[recipeFavMethod]) print file+"\t"+v+"\t"+stype+"\t%d\t%d\t%.4f"%(selfNum,favNum,favNum/(selfNum+1e-32))
def favRate(): files = os.listdir("/home/zhangzhonghui/data/seqHit/") for file in sorted(files): if not file.startswith("20"): continue for line in open("/home/zhangzhonghui/data/seqHit/" + file): if line.startswith("版本"): continue (v, stype, sret) = seqHit.readLine(line) if stype not in recipeInfo: continue selfNum = sret.num if recipeFavMethod in sret.sonTypes: favNum = DictUtil.sum(sret.sonTypes[recipeFavMethod]) print file + "\t" + v + "\t" + stype + "\t%d\t%d\t%.4f" % ( selfNum, favNum, favNum / (selfNum + 1e-32))
def dictSum(d): if SumMark not in d: return DictUtil.sum(d) return d[SumMark]