def addAction(action, actions, ret): ret.num += 1 DictUtil.addOne(ret.pages, len(action.pull)) (sts, swr) = getSonTypes(action.id, actions) addTs(sts, ret.sonTypes, ret.sons) (gts, gwr) = getGsonTypes(action.id, actions) addTs(gts, ret.gsonTypes, ret.gsons)
def count(): allCount = HitRet() actions = [] userinfo = None for line in sys.stdin: cols = line.strip().split("\t") if len(cols) < 2: sys.stderr.write(line) continue if cols[1] == "-1": output(actions) actions = [] userinfo = UserInfo.readUserInfo(cols) continue if userinfo == None: sys.stderr.write("userinfo is None:" + line) continue action = Action.readAction(cols, userinfo) if action == None: sys.stderr.write("action is None:" + line) continue if action.id > len(actions): sys.stderr.write("action.id > len(actions):id%d-len:%d:\t" % (action.id, len(actions)) + line) DictUtil.addOne(allCount.wrongs, 1) continue actions.append(action) output(actions) for version in cs: for stype in cs[version]: p = stype.find("_") if p < 0: p = len(stype) method = stype[0:p] print "%s\t%s\t%s" % (version, stype, cs[version][stype])
def addTs(sts, totalTypes, sums): ssum = 0 for t in sts: ssum += sts[t] if t not in totalTypes: totalTypes[t] = {} DictUtil.addOne(totalTypes[t], sts[t]) DictUtil.addOne(sums, ssum)
def readFeedback(lineOrFeeds,eleDict,begin="0000-00-00 00:00:00",end="9999-99-99 00:00:00"): signs={} for line in open(FeedDir+"sign.txt"): w=line.strip() if len(w) == 0: continue signs[utf8.un(line.strip())]=1 signs[u" "]=1 signs[u"\t"]=1 n=0 wordCount={} biCount={} for e in eleDict: seg.MaxSeg.addW(e) repeats={} for ele in lineOrFeeds: n+=1 if n <=0:continue if type(ele) == Feed: feed=ele else: feed=parseFeedLine(ele,repeats) if feed == None or (feed.createTime < begin or feed.createTime > end): #if feed != None:print feed.createTime continue ws=seg.MaxSeg.getSeg().maxSeg(feed.content) dws={} lastW="" bn=0 for w in ws: if w not in dws: DictUtil.addOne(wordCount,w) dws[w]=1 if bn > 0 and w not in signs and lastW not in signs: if lastW+w not in dws: DictUtil.addOne(biCount,lastW+w) dws[lastW+w]=1 bn+=1 lastW=w if n < 10:print feed #else:break for bi in biCount: #二元组合打折计数 bc=biCount[bi] if bi not in wordCount: if bc > 3:wordCount[bi]=bc else: wordCount[bi]+=bc return wordCount
def getSonTypes(id, actions): wrong = 0 ts = {} for sid in actions[id].sons: if sid >= len(actions): wrong += 1 continue DictUtil.addOne(ts, hitItemName.getName(actions[sid])) for pid in actions[id].pull: if pid >= len(actions): wrong += 1 continue (pts, pwr) = getSonTypes(pid, actions) DictUtil.merge(ts, pts) wrong += pwr return (ts, wrong)
def count(): allCount=PosRet() actions=[] userinfo=None for line in sys.stdin: cols=line.strip().split("\t") if len(cols) < 2: sys.stderr.write(line) continue if cols[1] == "-1": output(actions) actions=[] userinfo=UserInfo.readUserInfo(cols) continue if userinfo == None: sys.stderr.write("userinfo is None:"+line) continue action=Action.readAction(cols,userinfo) if action == None: sys.stderr.write("action is None:"+line) continue if action.id > len(actions): sys.stderr.write("action.id > len(actions):id%d-len:%d:\t"%(action.id,len(actions))+line) DictUtil.addOne(allCount.wrongs,1) continue actions.append(action) output(actions) for version in cs: for stype in cs[version]: p=stype.find("_") if p < 0: p=len(stype) method=stype[0:p] cstr=str(cs[version][stype]) if len(stype) == 0: stype="__VOID__" print "%s\t%s\t%s"%(stype,version,cstr)
def addDay(self, user, dayRet): self.addChannel(user) DictUtil.addOne(self.hits, dayRet.hitNum) DictUtil.addOne(self.lastDivs, dayRet.lastDiv) DictUtil.addOne(self.firstDivs, dayRet.firstDiv) for m in dayRet.ms: if m not in self.mu: self.mu[m] = 1 self.mc[m] = dayRet.ms[m] else: self.mu[m] += 1 self.mc[m] += dayRet.ms[m] for t in dayRet.mts: if t not in self.tu: self.tu[t] = 1 else: self.tu[t] += 1
def addDay(self,user,dayRet): self.addChannel(user) DictUtil.addOne(self.hits,dayRet.hitNum) DictUtil.addOne(self.lastDivs,dayRet.lastDiv) DictUtil.addOne(self.firstDivs,dayRet.firstDiv) for m in dayRet.ms: if m not in self.mu: self.mu[m]=1 self.mc[m]=dayRet.ms[m] else: self.mu[m]+=1 self.mc[m]+=dayRet.ms[m] for t in dayRet.mts: if t not in self.tu: self.tu[t]=1 else: self.tu[t]+=1
def distribute(): vs={} ms={} cs={} for line in sys.stdin: cols=line.strip().split("\t") if len(cols) < column.APP_LOG_COLUMNS: continue version=cols[column.VERSION_CID] channel=cols[column.MEDIA_CID] method=cols[column.METHOD_CID] uid=column.uid(cols) if uid == "": uid=None if method in regMs and uid == None: DictUtil.addOne(vs,version) DictUtil.addOne(ms,method) DictUtil.addOne(cs,channel) for v in vs: print "%s\t%d"%(v,vs[v]) for m in ms: print "%s\t%d"%(m,ms[m]) for c in cs: print "%s\t%d"%(c,cs[c])
def distribute(): vs = {} ms = {} cs = {} for line in sys.stdin: cols = line.strip().split("\t") if len(cols) < column.APP_LOG_COLUMNS: continue version = cols[column.VERSION_CID] channel = cols[column.MEDIA_CID] method = cols[column.METHOD_CID] uid = column.uid(cols) if uid == "": uid = None if method in regMs and uid == None: DictUtil.addOne(vs, version) DictUtil.addOne(ms, method) DictUtil.addOne(cs, channel) for v in vs: print "%s\t%d" % (v, vs[v]) for m in ms: print "%s\t%d" % (m, ms[m]) for c in cs: print "%s\t%d" % (c, cs[c])
def addTopicHit(self,topic): DictUtil.addOne(self.topicHit,topic)
def addFoodHit(self,foodId): DictUtil.addOne(self.foodHit,foodId)
def addHitCount(self,hc): DictUtil.addOne(self.hitCount,hc)
def addChannel(self,user): self.userNum+=1 media="" if user.uuid != None and user.uuid in us: media=us[user.uuid] DictUtil.addOne(self.channels,media)
def addAlbumHit(self,album): DictUtil.addOne(self.albumHit,album)
def addAction(action,actions,ret): ret.num+=1 DictUtil.addOne(ret.pages,len(action.pull)) (hs,wrong)=getSonTypes(action.id,actions) hit=addTs(hs,ret.sons) DictUtil.addOne(ret.sonNums,hit)
def addAction(action, actions, ret): ret.num += 1 DictUtil.addOne(ret.pages, len(action.pull)) (hs, wrong) = getSonTypes(action.id, actions) hit = addTs(hs, ret.sons) DictUtil.addOne(ret.sonNums, hit)
def addHitTitleHit(self,title,type,keyword): DictUtil.addOne(self.hitTitleHit,hitTitle(title,keyword)+type)
def addSearch(self,keyword,rid,pos): DictUtil.addOne(self.keywordShow,keyword) DictUtil.addOne(self.posShow,pos) DictUtil.addOne(self.ridShow,rid)
def addHit(self,keyword,rid,pos): DictUtil.addOne(self.keywordHit,keyword) DictUtil.addOne(self.posHit,pos) DictUtil.addOne(self.ridHit,rid)
def addChannel(self, user): self.userNum += 1 media = "" if user.uuid != None and user.uuid in us: media = us[user.uuid] DictUtil.addOne(self.channels, media)
def addSearchRet(self,ret,keyword): self.search+=1 if ret == None: return if "rids" in ret: for i in range(len(ret["rids"])): if i >= FirstPageNum: break self.addItem(ret["rids"][i],i) if "aid" in ret: DictUtil.addOne(self.albumShow,ret["aid"]) if "topicId" in ret: DictUtil.addOne(self.topicShow,ret["topicId"]) if "food" in ret: DictUtil.addOne(self.foodShow,ret["food"]) if "rtitles" in ret: for pos in range(len(ret["rtitles"])): if pos >= FirstPageNum: break title=ret["rtitles"][pos] #print title,keyword,hitTitle(title,keyword) DictUtil.addOne(self.hitTitleShow,hitTitle(title,keyword)+"%d"%(pos)) if "atitle" in ret: title=ret["atitle"] DictUtil.addOne(self.hitTitleShow,hitTitle(title,keyword)+"a") if "ttitle" in ret: title=ret["ttitle"] DictUtil.addOne(self.hitTitleShow,hitTitle(title,keyword)+"t")