Exemple #1
0
def addAction(action, actions, ret):
    ret.num += 1
    DictUtil.addOne(ret.pages, len(action.pull))
    (sts, swr) = getSonTypes(action.id, actions)
    addTs(sts, ret.sonTypes, ret.sons)
    (gts, gwr) = getGsonTypes(action.id, actions)
    addTs(gts, ret.gsonTypes, ret.gsons)
Exemple #2
0
def count():
    allCount = HitRet()
    actions = []
    userinfo = None
    for line in sys.stdin:
        cols = line.strip().split("\t")
        if len(cols) < 2:
            sys.stderr.write(line)
            continue
        if cols[1] == "-1":
            output(actions)
            actions = []
            userinfo = UserInfo.readUserInfo(cols)
            continue
        if userinfo == None:
            sys.stderr.write("userinfo is None:" + line)
            continue
        action = Action.readAction(cols, userinfo)
        if action == None:
            sys.stderr.write("action is None:" + line)
            continue
        if action.id > len(actions):
            sys.stderr.write("action.id > len(actions):id%d-len:%d:\t" %
                             (action.id, len(actions)) + line)
            DictUtil.addOne(allCount.wrongs, 1)
            continue
        actions.append(action)
    output(actions)
    for version in cs:
        for stype in cs[version]:
            p = stype.find("_")
            if p < 0:
                p = len(stype)
            method = stype[0:p]
            print "%s\t%s\t%s" % (version, stype, cs[version][stype])
Exemple #3
0
def addTs(sts, totalTypes, sums):
    ssum = 0
    for t in sts:
        ssum += sts[t]
        if t not in totalTypes:
            totalTypes[t] = {}
        DictUtil.addOne(totalTypes[t], sts[t])
    DictUtil.addOne(sums, ssum)
Exemple #4
0
def readFeedback(lineOrFeeds,eleDict,begin="0000-00-00 00:00:00",end="9999-99-99 00:00:00"):
	signs={}
	for line in open(FeedDir+"sign.txt"):
		w=line.strip()
		if len(w) == 0:
			continue
		signs[utf8.un(line.strip())]=1
	signs[u" "]=1
	signs[u"\t"]=1
	n=0
	wordCount={}
	biCount={}
	for e in eleDict:
		seg.MaxSeg.addW(e)
	repeats={}
	for ele in lineOrFeeds:
		n+=1
		if n <=0:continue
		if type(ele) == Feed:
			feed=ele
		else:
			feed=parseFeedLine(ele,repeats)
		if feed == None or (feed.createTime < begin or feed.createTime > end):
			#if feed != None:print feed.createTime
			continue
		ws=seg.MaxSeg.getSeg().maxSeg(feed.content)
		dws={}
		lastW=""
		bn=0
		for w in ws:
			if w not in dws:
				DictUtil.addOne(wordCount,w)
				dws[w]=1
			if bn > 0 and w not in signs and lastW not in signs:
				if lastW+w not in dws:
					DictUtil.addOne(biCount,lastW+w)
					dws[lastW+w]=1
			bn+=1
			lastW=w
		if n < 10:print feed
		#else:break
	for bi in biCount:  #二元组合打折计数
		bc=biCount[bi]
		if bi not in wordCount:
			if bc > 3:wordCount[bi]=bc
		else:
			wordCount[bi]+=bc
	return wordCount
Exemple #5
0
def getSonTypes(id, actions):
    wrong = 0
    ts = {}
    for sid in actions[id].sons:
        if sid >= len(actions):
            wrong += 1
            continue
        DictUtil.addOne(ts, hitItemName.getName(actions[sid]))
    for pid in actions[id].pull:
        if pid >= len(actions):
            wrong += 1
            continue
        (pts, pwr) = getSonTypes(pid, actions)
        DictUtil.merge(ts, pts)
        wrong += pwr
    return (ts, wrong)
Exemple #6
0
def count():
	allCount=PosRet()
	actions=[]
	userinfo=None
	for line in sys.stdin:
		cols=line.strip().split("\t")
		if len(cols) < 2:
			sys.stderr.write(line)
			continue
		if cols[1] == "-1":
			output(actions)
			actions=[]
			userinfo=UserInfo.readUserInfo(cols)
			continue
		if userinfo == None:
			sys.stderr.write("userinfo is None:"+line)
			continue
		action=Action.readAction(cols,userinfo)
		if action == None:
			sys.stderr.write("action is None:"+line)
			continue
		if action.id > len(actions):
			sys.stderr.write("action.id > len(actions):id%d-len:%d:\t"%(action.id,len(actions))+line)
			DictUtil.addOne(allCount.wrongs,1)
			continue
		actions.append(action)
	output(actions)
	for version in cs:
		for stype in cs[version]:
			p=stype.find("_")
			if p < 0:
				p=len(stype)
			method=stype[0:p]
			cstr=str(cs[version][stype])
			if len(stype) == 0:
				stype="__VOID__"
			print "%s\t%s\t%s"%(stype,version,cstr)
Exemple #7
0
 def addDay(self, user, dayRet):
     self.addChannel(user)
     DictUtil.addOne(self.hits, dayRet.hitNum)
     DictUtil.addOne(self.lastDivs, dayRet.lastDiv)
     DictUtil.addOne(self.firstDivs, dayRet.firstDiv)
     for m in dayRet.ms:
         if m not in self.mu:
             self.mu[m] = 1
             self.mc[m] = dayRet.ms[m]
         else:
             self.mu[m] += 1
             self.mc[m] += dayRet.ms[m]
     for t in dayRet.mts:
         if t not in self.tu:
             self.tu[t] = 1
         else:
             self.tu[t] += 1
Exemple #8
0
	def addDay(self,user,dayRet):
		self.addChannel(user)
		DictUtil.addOne(self.hits,dayRet.hitNum)
		DictUtil.addOne(self.lastDivs,dayRet.lastDiv)
		DictUtil.addOne(self.firstDivs,dayRet.firstDiv)
		for m in dayRet.ms:
			if m not in self.mu:
				self.mu[m]=1
				self.mc[m]=dayRet.ms[m]
			else:
				self.mu[m]+=1
				self.mc[m]+=dayRet.ms[m]
		for t in dayRet.mts:
			if t not in self.tu:
				self.tu[t]=1
			else:
				self.tu[t]+=1
Exemple #9
0
def distribute():
	vs={}
	ms={}
	cs={}
	for line in sys.stdin:
		cols=line.strip().split("\t")
		if len(cols) < column.APP_LOG_COLUMNS:
			continue
		version=cols[column.VERSION_CID]
		channel=cols[column.MEDIA_CID]
		method=cols[column.METHOD_CID]
		uid=column.uid(cols)
		if uid == "":
			uid=None
		if method in regMs and uid == None:
			DictUtil.addOne(vs,version)
			DictUtil.addOne(ms,method)
			DictUtil.addOne(cs,channel)
	for v in vs:
		print "%s\t%d"%(v,vs[v])
	for m in ms:
		print "%s\t%d"%(m,ms[m])
	for c in cs:
		print "%s\t%d"%(c,cs[c])
Exemple #10
0
def distribute():
    vs = {}
    ms = {}
    cs = {}
    for line in sys.stdin:
        cols = line.strip().split("\t")
        if len(cols) < column.APP_LOG_COLUMNS:
            continue
        version = cols[column.VERSION_CID]
        channel = cols[column.MEDIA_CID]
        method = cols[column.METHOD_CID]
        uid = column.uid(cols)
        if uid == "":
            uid = None
        if method in regMs and uid == None:
            DictUtil.addOne(vs, version)
            DictUtil.addOne(ms, method)
            DictUtil.addOne(cs, channel)
    for v in vs:
        print "%s\t%d" % (v, vs[v])
    for m in ms:
        print "%s\t%d" % (m, ms[m])
    for c in cs:
        print "%s\t%d" % (c, cs[c])
Exemple #11
0
	def addTopicHit(self,topic):
		DictUtil.addOne(self.topicHit,topic)
Exemple #12
0
	def addFoodHit(self,foodId):
		DictUtil.addOne(self.foodHit,foodId)
Exemple #13
0
	def addHitCount(self,hc):
		DictUtil.addOne(self.hitCount,hc)
Exemple #14
0
	def addChannel(self,user):
		self.userNum+=1
		media=""
		if user.uuid != None and user.uuid in us:
			media=us[user.uuid]
		DictUtil.addOne(self.channels,media)
Exemple #15
0
	def addAlbumHit(self,album):
		DictUtil.addOne(self.albumHit,album)
Exemple #16
0
def addAction(action,actions,ret):
	ret.num+=1
	DictUtil.addOne(ret.pages,len(action.pull))
	(hs,wrong)=getSonTypes(action.id,actions)
	hit=addTs(hs,ret.sons)
	DictUtil.addOne(ret.sonNums,hit)
Exemple #17
0
def addAction(action, actions, ret):
    ret.num += 1
    DictUtil.addOne(ret.pages, len(action.pull))
    (hs, wrong) = getSonTypes(action.id, actions)
    hit = addTs(hs, ret.sons)
    DictUtil.addOne(ret.sonNums, hit)
Exemple #18
0
	def addHitTitleHit(self,title,type,keyword):
		DictUtil.addOne(self.hitTitleHit,hitTitle(title,keyword)+type)
Exemple #19
0
	def addSearch(self,keyword,rid,pos):
		DictUtil.addOne(self.keywordShow,keyword)
		DictUtil.addOne(self.posShow,pos)
		DictUtil.addOne(self.ridShow,rid)
Exemple #20
0
	def addHit(self,keyword,rid,pos):
		DictUtil.addOne(self.keywordHit,keyword)
		DictUtil.addOne(self.posHit,pos)
		DictUtil.addOne(self.ridHit,rid)
Exemple #21
0
 def addChannel(self, user):
     self.userNum += 1
     media = ""
     if user.uuid != None and user.uuid in us:
         media = us[user.uuid]
     DictUtil.addOne(self.channels, media)
Exemple #22
0
	def addSearchRet(self,ret,keyword):
		self.search+=1
		if ret == None:
			return
		if "rids" in ret:
			for i in range(len(ret["rids"])):
				if i >= FirstPageNum:
					break
				self.addItem(ret["rids"][i],i)
		if "aid" in ret:
			DictUtil.addOne(self.albumShow,ret["aid"])
		if "topicId" in ret:
			DictUtil.addOne(self.topicShow,ret["topicId"])
		if "food" in ret:
			DictUtil.addOne(self.foodShow,ret["food"])
		if "rtitles" in ret:
			for pos in range(len(ret["rtitles"])):
				if pos >= FirstPageNum:
					break
				title=ret["rtitles"][pos]
				#print title,keyword,hitTitle(title,keyword)
				DictUtil.addOne(self.hitTitleShow,hitTitle(title,keyword)+"%d"%(pos))
		if "atitle" in ret:
			title=ret["atitle"]
			DictUtil.addOne(self.hitTitleShow,hitTitle(title,keyword)+"a")
		if "ttitle" in ret:
			title=ret["ttitle"]
			DictUtil.addOne(self.hitTitleShow,hitTitle(title,keyword)+"t")