예제 #1
0
def sonFavRate():
	print "日期\t版本\t栏目名\t栏目进入数\t列表页菜谱点击数\t菜谱收藏数\t菜谱点击率\t菜谱收藏率\t栏目菜谱收藏率"
	files=os.listdir("/home/zhangzhonghui/data/seqHit/")
	for file in sorted(files):
		if not file.startswith("20"):
			continue
		day=file[file.rfind("/")+1:]
		ts={}
		for line in open("/home/zhangzhonghui/data/seqHit/"+file):
			if line.startswith("版本"):
				continue
			(v,stype,sret)=seqHit.readLine(line)
			selfNum=0
			for st in sret.sonTypes:
				if st in recipeInfo:
					selfNum+=DictUtil.sum(sret.sonTypes[st])
			if selfNum > 0:
				if recipeFavMethod in sret.gsonTypes:
					favNum=DictUtil.sum(sret.gsonTypes[recipeFavMethod])
					if stype not in ts:
						ts[stype]=[0,0,0]
					ts[stype][0]+=sret.num
					ts[stype][1]+=selfNum
					ts[stype][2]+=favNum
					print day+"\t"+v+"\t"+stype+favStr(sret.num,selfNum,favNum)
#"\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f"%(sret.num,selfNum,favNum,selfNum/(sret.num+1e-32),favNum/(selfNum+1e-32),favNum/(sret.num+1e-32))
		for stype in ts:
			(num,hnum,favNum)=ts[stype]
			print day+"\tall_version\t"+stype+favStr(num,hnum,favNum)
예제 #2
0
파일: seqHit.py 프로젝트: rainly/scripts-1
def count():
    allCount = HitRet()
    actions = []
    userinfo = None
    for line in sys.stdin:
        cols = line.strip().split("\t")
        if len(cols) < 2:
            sys.stderr.write(line)
            continue
        if cols[1] == "-1":
            output(actions)
            actions = []
            userinfo = UserInfo.readUserInfo(cols)
            continue
        if userinfo == None:
            sys.stderr.write("userinfo is None:" + line)
            continue
        action = Action.readAction(cols, userinfo)
        if action == None:
            sys.stderr.write("action is None:" + line)
            continue
        if action.id > len(actions):
            sys.stderr.write("action.id > len(actions):id%d-len:%d:\t" %
                             (action.id, len(actions)) + line)
            DictUtil.addOne(allCount.wrongs, 1)
            continue
        actions.append(action)
    output(actions)
    for version in cs:
        for stype in cs[version]:
            p = stype.find("_")
            if p < 0:
                p = len(stype)
            method = stype[0:p]
            print "%s\t%s\t%s" % (version, stype, cs[version][stype])
예제 #3
0
파일: seqHit.py 프로젝트: rainly/scripts-1
def addAction(action, actions, ret):
    ret.num += 1
    DictUtil.addOne(ret.pages, len(action.pull))
    (sts, swr) = getSonTypes(action.id, actions)
    addTs(sts, ret.sonTypes, ret.sons)
    (gts, gwr) = getGsonTypes(action.id, actions)
    addTs(gts, ret.gsonTypes, ret.gsons)
예제 #4
0
def sonFavRate():
    print "日期\t版本\t栏目名\t栏目进入数\t列表页菜谱点击数\t菜谱收藏数\t菜谱点击率\t菜谱收藏率\t栏目菜谱收藏率"
    files = os.listdir("/home/zhangzhonghui/data/seqHit/")
    for file in sorted(files):
        if not file.startswith("20"):
            continue
        day = file[file.rfind("/") + 1:]
        ts = {}
        for line in open("/home/zhangzhonghui/data/seqHit/" + file):
            if line.startswith("版本"):
                continue
            (v, stype, sret) = seqHit.readLine(line)
            selfNum = 0
            for st in sret.sonTypes:
                if st in recipeInfo:
                    selfNum += DictUtil.sum(sret.sonTypes[st])
            if selfNum > 0:
                if recipeFavMethod in sret.gsonTypes:
                    favNum = DictUtil.sum(sret.gsonTypes[recipeFavMethod])
                    if stype not in ts:
                        ts[stype] = [0, 0, 0]
                    ts[stype][0] += sret.num
                    ts[stype][1] += selfNum
                    ts[stype][2] += favNum
                    print day + "\t" + v + "\t" + stype + favStr(
                        sret.num, selfNum, favNum)


#"\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f"%(sret.num,selfNum,favNum,selfNum/(sret.num+1e-32),favNum/(selfNum+1e-32),favNum/(sret.num+1e-32))
        for stype in ts:
            (num, hnum, favNum) = ts[stype]
            print day + "\tall_version\t" + stype + favStr(num, hnum, favNum)
예제 #5
0
파일: seqHit.py 프로젝트: rainly/scripts-1
def addTs(sts, totalTypes, sums):
    ssum = 0
    for t in sts:
        ssum += sts[t]
        if t not in totalTypes:
            totalTypes[t] = {}
        DictUtil.addOne(totalTypes[t], sts[t])
    DictUtil.addOne(sums, ssum)
예제 #6
0
 def merge(self, other):
     self.num += other.num
     DictUtil.merge(self.pages, other.pages)
     DictUtil.merge(self.sonNums, other.sonNums)
     for item in other.sons:
         if item not in self.sons:
             self.sons[item] = [0, 0, 0]
         self.sons[item][0] += other.sons[item][0]
         self.sons[item][1] += other.sons[item][1]
         self.sons[item][2] += other.sons[item][2]
예제 #7
0
파일: posHit.py 프로젝트: wgzhao/scripts
	def merge(self,other):
		self.num+=other.num
		DictUtil.merge(self.pages,other.pages)
		DictUtil.merge(self.sonNums,other.sonNums)
		for item in other.sons:
			if item not in self.sons:
				self.sons[item]=[0,0,0]
			self.sons[item][0]+=other.sons[item][0]
			self.sons[item][1]+=other.sons[item][1]
			self.sons[item][2]+=other.sons[item][2]
예제 #8
0
파일: feedback.py 프로젝트: wgzhao/scripts
def testReadAllDict():
	f_handler=open(FeedDir+'feedback.log', 'w')
	sys.stdout=f_handler	
	classDict,tDict,wDict,eleDict=readAllDict()
	print DictUtil.dictStr(tDict)
	print DictUtil.dictStr(wDict)
	print DictUtil.dictStr(classDict)
	print DictUtil.dictStr(eleDict)
예제 #9
0
def testReadAllDict():
    f_handler = open(FeedDir + 'feedback.log', 'w')
    sys.stdout = f_handler
    classDict, tDict, wDict, eleDict = readAllDict()
    print DictUtil.dictStr(tDict)
    print DictUtil.dictStr(wDict)
    print DictUtil.dictStr(classDict)
    print DictUtil.dictStr(eleDict)
예제 #10
0
def readFeedback(lineOrFeeds,eleDict,begin="0000-00-00 00:00:00",end="9999-99-99 00:00:00"):
	signs={}
	for line in open(FeedDir+"sign.txt"):
		w=line.strip()
		if len(w) == 0:
			continue
		signs[utf8.un(line.strip())]=1
	signs[u" "]=1
	signs[u"\t"]=1
	n=0
	wordCount={}
	biCount={}
	for e in eleDict:
		seg.MaxSeg.addW(e)
	repeats={}
	for ele in lineOrFeeds:
		n+=1
		if n <=0:continue
		if type(ele) == Feed:
			feed=ele
		else:
			feed=parseFeedLine(ele,repeats)
		if feed == None or (feed.createTime < begin or feed.createTime > end):
			#if feed != None:print feed.createTime
			continue
		ws=seg.MaxSeg.getSeg().maxSeg(feed.content)
		dws={}
		lastW=""
		bn=0
		for w in ws:
			if w not in dws:
				DictUtil.addOne(wordCount,w)
				dws[w]=1
			if bn > 0 and w not in signs and lastW not in signs:
				if lastW+w not in dws:
					DictUtil.addOne(biCount,lastW+w)
					dws[lastW+w]=1
			bn+=1
			lastW=w
		if n < 10:print feed
		#else:break
	for bi in biCount:  #二元组合打折计数
		bc=biCount[bi]
		if bi not in wordCount:
			if bc > 3:wordCount[bi]=bc
		else:
			wordCount[bi]+=bc
	return wordCount
예제 #11
0
def ds(uuidFile,today):
	regUserFile="/home/zhangzhonghui/data/reg/regUser."+today
	uuids={}
	for line in open(regUserFile):
		cols=line.strip().split()
		uuids[cols[0]]=[cols[1],""]
	for line in open(uuidFile):
		cols=line.split()
		if len(cols) < 4:continue
		uuid=cols[0]
		if uuid not in uuids:continue
		day=cols[2]
		if uuids[uuid][1] == "":
			uuids[uuid][1]=day
		else:
			if uuids[uuid][1] > day:
				uuids[uuid][1]=day
	ds={}
	for uuid in uuids:
		OK,lastDay=uuids[uuid]
		if OK not in ds:
			ds[OK]={}
		div=TimeUtil.daysDiv(lastDay,today)
		if div not in ds[OK]:
			ds[OK][div]=1
		else:
			ds[OK][div]+=1
	for OK in ds:
		print OK,DictUtil.sum(ds[OK])
		print ds[OK]
예제 #12
0
def reportSonsFile(method,version,file,ts,dts,v2=100000):
	if True:
		for line in open(file):
			if line.startswith("版本"):
				continue
			if True:
				try:
					(v,stype,sret)=seqHit.readLine(line)
				except:
					sys.stderr.write(line)
					continue
				tnum=sret.num+1e-32
				if tnum < 1.0:
					continue
				try:
					v=int(v)
				except:
					v=0
				if v >= version and v <= v2 and stype.startswith(method):
					if len(dts) <= 0 or dts[-1][0] != file:
						dts.append((file,{}))
					for type in sret.sonTypes:
						rate=DictUtil.sum(sret.sonTypes[type])
						type=hitItemName.getTypeName(type)
						if type not in dts[-1][1]:
							ts[type]=1
							dts[-1][1][type]=0
						dts[-1][1][type]+=rate
	'''
예제 #13
0
def ds(uuidFile, today):
    regUserFile = "/home/zhangzhonghui/data/reg/regUser." + today
    uuids = {}
    for line in open(regUserFile):
        cols = line.strip().split()
        uuids[cols[0]] = [cols[1], ""]
    for line in open(uuidFile):
        cols = line.split()
        if len(cols) < 4: continue
        uuid = cols[0]
        if uuid not in uuids: continue
        day = cols[2]
        if uuids[uuid][1] == "":
            uuids[uuid][1] = day
        else:
            if uuids[uuid][1] > day:
                uuids[uuid][1] = day
    ds = {}
    for uuid in uuids:
        OK, lastDay = uuids[uuid]
        if OK not in ds:
            ds[OK] = {}
        div = TimeUtil.daysDiv(lastDay, today)
        if div not in ds[OK]:
            ds[OK][div] = 1
        else:
            ds[OK][div] += 1
    for OK in ds:
        print OK, DictUtil.sum(ds[OK])
        print ds[OK]
예제 #14
0
파일: seqHit.py 프로젝트: rainly/scripts-1
def getSonTypes(id, actions):
    wrong = 0
    ts = {}
    for sid in actions[id].sons:
        if sid >= len(actions):
            wrong += 1
            continue
        DictUtil.addOne(ts, hitItemName.getName(actions[sid]))
    for pid in actions[id].pull:
        if pid >= len(actions):
            wrong += 1
            continue
        (pts, pwr) = getSonTypes(pid, actions)
        DictUtil.merge(ts, pts)
        wrong += pwr
    return (ts, wrong)
예제 #15
0
def reportSonsFile(method, version, file, ts, dts, v2=100000):
    if True:
        for line in open(file):
            if line.startswith("版本"):
                continue
            if True:
                try:
                    (v, stype, sret) = seqHit.readLine(line)
                except:
                    sys.stderr.write(line)
                    continue
                tnum = sret.num + 1e-32
                if tnum < 1.0:
                    continue
                try:
                    v = int(v)
                except:
                    v = 0
                if v >= version and v <= v2 and stype.startswith(method):
                    if len(dts) <= 0 or dts[-1][0] != file:
                        dts.append((file, {}))
                    for type in sret.sonTypes:
                        rate = DictUtil.sum(sret.sonTypes[type])
                        type = hitItemName.getTypeName(type)
                        if type not in dts[-1][1]:
                            ts[type] = 1
                            dts[-1][1][type] = 0
                        dts[-1][1][type] += rate
    '''
예제 #16
0
def count():
	ts={}
	tagFile="/home/zhangzhonghui/data/backup.1203/tagQuery.all.txt"
	#tagFile="/home/zhangzhonghui/data/backup.1202/tag.txt"
	for line in open(tagFile):
		ts[line.strip()]=1
		ts["v45_"+line.strip()]=1
	dir=sys.argv[1]
	files=os.listdir(dir)
	files=sorted(files)
	for file in files:
		tck=clickCount.Click()
		ack=clickCount.Click()
		if not file.startswith("searchKeyword45.2014-"):
			continue
		for line in open(dir+"/"+file):
			cols=line.strip().split("\t")
			k=cols[0]
			if k.startswith(searchKeyword45.CardFix):
				continue
			if k.endswith("##total##"):
				(k,click)=clickCount.readClick(cols)
				ack.merge(click)
				continue
			if k not in ts:
				continue
			try:
				(k,click)=clickCount.readClick(cols)
			except:
				sys.stderr.write(line)
				continue
			tck.merge(click)
			voidHit=0
			if 0 in click.hitCount:
				voidHit=click.hitCount[0]
			hit=DictUtil.sum(click.hit)
			if clickCount.SumMark in click.hit:
				hit=click.hit[clickCount.SumMark]
			if click.search==0:
				click.search+=1e-12
			#print k,click.search
			#print "%s\t%s\t%d\t%d\t%d\t%f\t%f"%(file[-10:],k,click.search,hit,(click.search-voidHit),float(hit)/click.search,float(click.search-voidHit)/click.search)
			#print click
		hit=clickCount.dictSum(tck.hit)
		voidHit=0
		if 0 in tck.hitCount:
			voidHit=tck.hitCount[0]
		k="all"
		if tck.search == 0:
			tck.search+=1e-3
		tn=float(hit)/tck.search
		an=float(ack.hit[clickCount.SumMark])/ack.search
		trate=float(tck.search-voidHit)/tck.search
		arate=float(ack.search-ack.hitCount[0])/ack.search
		
		#print "%s\t%f\t%f"%(file[-10:],float(tck.search)/ack.search,trate/arate)
		print "%s\t%d\t%d\t%d\t%d\t%d\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f"%(file[-10:],ack.search,ack.hit[clickCount.SumMark],tck.search,hit,(tck.search-voidHit),float(hit)/tck.search,trate,arate,tn/an,trate/arate)
예제 #17
0
def cosineWeit(d1, d2):
    t1 = {}
    for w in d1:
        if w in tagWeit:
            t1[w] = d1[w] * tagWeit[w]
    t2 = {}
    for w in d2:
        if w in tagWeit:
            t2[w] = d2[w] * tagWeit[w]
    return DictUtil.cosine(t1, t2)
예제 #18
0
파일: weitTag.py 프로젝트: wgzhao/scripts
def cosineWeit(d1,d2):
	t1={}
	for w in d1:
		if w in tagWeit:
			t1[w]=d1[w]*tagWeit[w]
	t2={}
	for w in d2:
		if w in tagWeit:
			t2[w]=d2[w]*tagWeit[w]
	return DictUtil.cosine(t1,t2)
예제 #19
0
 def __str__(self):
     #if len(self.hit) <= 0:
     #	return ""
     return self.user.name() + "\t" + str(
         self.id
     ) + "\t" + self.time + "\t" + self.method + "\t" + self.para + "\t" + str(
         self.response) + "\t" + str(self.hit) + "\t" + str(
             self.sons) + "\t" + str(self.pull) + "\t" + str(
                 self.parentId) + "\t" + self.parentMethod + "\t" + str(
                     self.getItem()) + "\t" + DictUtil.listStr(
                         self.resultItems())
예제 #20
0
파일: cardClick.py 프로젝트: wgzhao/scripts
	def merge(self,cc):
		self.posShow=DictUtil.merge(self.posShow,cc.posShow)
		self.posHit=DictUtil.merge(self.posHit,cc.posHit)
		self.keywordHit=DictUtil.merge(self.keywordHit,cc.keywordHit)
		self.keywordShow=DictUtil.merge(self.keywordShow,cc.keywordShow)
		self.ridShow=DictUtil.merge(self.ridShow,cc.ridShow)
		self.ridHit=DictUtil.merge(self.ridHit,cc.ridHit)
예제 #21
0
파일: tt.py 프로젝트: wgzhao/scripts
def t7(tag="增长下降比:"):
	vs=[]
	for line in sys.stdin:
		p=line.find(tag)
		if p >= 0:
			p1=line.find("=",p+len(tag))
			if p1 < 0:
				p1=line.find(" ",p+len(tag))
			if p1 < 0:
				p1=line.find("\t",p+len(tag))
			v=float(line[p+len(tag):p1])
			vs.append(v)
	(sum,avg,std,s0)=DictUtil.statis(vs)
	sys.stderr.write("sum:%.4f    loss:%d,%.2f    avg:%.2f%%    std:%.4f    avg/std:%.4f\n"%(sum,s0,float(s0)/len(vs),avg*100,std,avg/(std+1e-64)))
예제 #22
0
def topnRateStr(d, n=500):
    sd = sorted(d.items(), key=lambda e: e[1], reverse=True)
    nn = 0
    sum = DictUtil.sum(d)
    if sum == 0:
        sum += 1e-32
    s = ""
    for w, c in sd:
        v = float(c) / sum
        nn += 1
        if nn > n:
            break
        s += "(" + str(w) + ",%d,%.4f" % (c, v) + ")"
    return "[" + s + "]"
예제 #23
0
파일: analysis.py 프로젝트: wgzhao/scripts
def topnRateStr(d,n=500):
	sd=sorted(d.items(),key=lambda e:e[1],reverse=True)
	nn=0
	sum=DictUtil.sum(d)
	if sum == 0:
		sum+=1e-32
	s=""
	for w,c in sd:
		v=float(c)/sum
		nn+=1
		if nn > n:
			break
		s+="("+str(w)+",%d,%.4f"%(c,v)+")"
	return "["+s+"]"
예제 #24
0
파일: posHit.py 프로젝트: wgzhao/scripts
def count():
	allCount=PosRet()
	actions=[]
	userinfo=None
	for line in sys.stdin:
		cols=line.strip().split("\t")
		if len(cols) < 2:
			sys.stderr.write(line)
			continue
		if cols[1] == "-1":
			output(actions)
			actions=[]
			userinfo=UserInfo.readUserInfo(cols)
			continue
		if userinfo == None:
			sys.stderr.write("userinfo is None:"+line)
			continue
		action=Action.readAction(cols,userinfo)
		if action == None:
			sys.stderr.write("action is None:"+line)
			continue
		if action.id > len(actions):
			sys.stderr.write("action.id > len(actions):id%d-len:%d:\t"%(action.id,len(actions))+line)
			DictUtil.addOne(allCount.wrongs,1)
			continue
		actions.append(action)
	output(actions)
	for version in cs:
		for stype in cs[version]:
			p=stype.find("_")
			if p < 0:
				p=len(stype)
			method=stype[0:p]
			cstr=str(cs[version][stype])
			if len(stype) == 0:
				stype="__VOID__"
			print "%s\t%s\t%s"%(stype,version,cstr)
예제 #25
0
def favRate():
	files=os.listdir("/home/zhangzhonghui/data/seqHit/")
	for file in sorted(files):
		if not file.startswith("20"):
			continue
		for line in open("/home/zhangzhonghui/data/seqHit/"+file):
			if line.startswith("版本"):
				continue
			(v,stype,sret)=seqHit.readLine(line)
			if stype not in recipeInfo:
				continue
			selfNum=sret.num
			if recipeFavMethod in sret.sonTypes:
				favNum=DictUtil.sum(sret.sonTypes[recipeFavMethod])
				print file+"\t"+v+"\t"+stype+"\t%d\t%d\t%.4f"%(selfNum,favNum,favNum/(selfNum+1e-32))
예제 #26
0
파일: tt.py 프로젝트: rainly/scripts-1
def t7(tag="增长下降比:"):
    vs = []
    for line in sys.stdin:
        p = line.find(tag)
        if p >= 0:
            p1 = line.find("=", p + len(tag))
            if p1 < 0:
                p1 = line.find(" ", p + len(tag))
            if p1 < 0:
                p1 = line.find("\t", p + len(tag))
            v = float(line[p + len(tag):p1])
            vs.append(v)
    (sum, avg, std, s0) = DictUtil.statis(vs)
    sys.stderr.write(
        "sum:%.4f    loss:%d,%.2f    avg:%.2f%%    std:%.4f    avg/std:%.4f\n"
        % (sum, s0, float(s0) / len(vs), avg * 100, std, avg / (std + 1e-64)))
예제 #27
0
def favRate():
    files = os.listdir("/home/zhangzhonghui/data/seqHit/")
    for file in sorted(files):
        if not file.startswith("20"):
            continue
        for line in open("/home/zhangzhonghui/data/seqHit/" + file):
            if line.startswith("版本"):
                continue
            (v, stype, sret) = seqHit.readLine(line)
            if stype not in recipeInfo:
                continue
            selfNum = sret.num
            if recipeFavMethod in sret.sonTypes:
                favNum = DictUtil.sum(sret.sonTypes[recipeFavMethod])
                print file + "\t" + v + "\t" + stype + "\t%d\t%d\t%.4f" % (
                    selfNum, favNum, favNum / (selfNum + 1e-32))
예제 #28
0
파일: tt.py 프로젝트: wgzhao/scripts
def t3():
	sum=0
	n=0
	vs=[]
	for line in sys.stdin:
		if line.startswith("增长率:"):
			p=line.find("\t")
			try:
				v=float(line[len("增长率:"):p])
			except:
				continue
			if abs(v) > 100:
				#print line
				continue
			#print v
			sum+=v
			n+=1
			vs.append(v)
	#sys.stderr.write("sum:%f\tn:%d\tavg:%f\n"%(sum,n,sum/n))
	(sum,avg,std,s0)=DictUtil.statis(vs)
	sys.stderr.write("sum:%.4f    loss:%d,%.2f    avg:%.2f%%    std:%.4f    avg/std:%.4f\n"%(sum,s0,float(s0)/len(vs),avg*100,std,avg/std))
예제 #29
0
파일: analysis.py 프로젝트: wgzhao/scripts
	def addDay(self,user,dayRet):
		self.addChannel(user)
		DictUtil.addOne(self.hits,dayRet.hitNum)
		DictUtil.addOne(self.lastDivs,dayRet.lastDiv)
		DictUtil.addOne(self.firstDivs,dayRet.firstDiv)
		for m in dayRet.ms:
			if m not in self.mu:
				self.mu[m]=1
				self.mc[m]=dayRet.ms[m]
			else:
				self.mu[m]+=1
				self.mc[m]+=dayRet.ms[m]
		for t in dayRet.mts:
			if t not in self.tu:
				self.tu[t]=1
			else:
				self.tu[t]+=1
예제 #30
0
 def addDay(self, user, dayRet):
     self.addChannel(user)
     DictUtil.addOne(self.hits, dayRet.hitNum)
     DictUtil.addOne(self.lastDivs, dayRet.lastDiv)
     DictUtil.addOne(self.firstDivs, dayRet.firstDiv)
     for m in dayRet.ms:
         if m not in self.mu:
             self.mu[m] = 1
             self.mc[m] = dayRet.ms[m]
         else:
             self.mu[m] += 1
             self.mc[m] += dayRet.ms[m]
     for t in dayRet.mts:
         if t not in self.tu:
             self.tu[t] = 1
         else:
             self.tu[t] += 1
예제 #31
0
파일: tt.py 프로젝트: rainly/scripts-1
def t3():
    sum = 0
    n = 0
    vs = []
    for line in sys.stdin:
        if line.startswith("增长率:"):
            p = line.find("\t")
            try:
                v = float(line[len("增长率:"):p])
            except:
                continue
            if abs(v) > 100:
                #print line
                continue
            #print v
            sum += v
            n += 1
            vs.append(v)
    #sys.stderr.write("sum:%f\tn:%d\tavg:%f\n"%(sum,n,sum/n))
    (sum, avg, std, s0) = DictUtil.statis(vs)
    sys.stderr.write(
        "sum:%.4f    loss:%d,%.2f    avg:%.2f%%    std:%.4f    avg/std:%.4f\n"
        % (sum, s0, float(s0) / len(vs), avg * 100, std, avg / std))
예제 #32
0
파일: regCount.py 프로젝트: wgzhao/scripts
def distribute():
	vs={}
	ms={}
	cs={}
	for line in sys.stdin:
		cols=line.strip().split("\t")
		if len(cols) < column.APP_LOG_COLUMNS:
			continue
		version=cols[column.VERSION_CID]
		channel=cols[column.MEDIA_CID]
		method=cols[column.METHOD_CID]
		uid=column.uid(cols)
		if uid == "":
			uid=None
		if method in regMs and uid == None:
			DictUtil.addOne(vs,version)
			DictUtil.addOne(ms,method)
			DictUtil.addOne(cs,channel)
	for v in vs:
		print "%s\t%d"%(v,vs[v])
	for m in ms:
		print "%s\t%d"%(m,ms[m])
	for c in cs:
		print "%s\t%d"%(c,cs[c])
예제 #33
0
def distribute():
    vs = {}
    ms = {}
    cs = {}
    for line in sys.stdin:
        cols = line.strip().split("\t")
        if len(cols) < column.APP_LOG_COLUMNS:
            continue
        version = cols[column.VERSION_CID]
        channel = cols[column.MEDIA_CID]
        method = cols[column.METHOD_CID]
        uid = column.uid(cols)
        if uid == "":
            uid = None
        if method in regMs and uid == None:
            DictUtil.addOne(vs, version)
            DictUtil.addOne(ms, method)
            DictUtil.addOne(cs, channel)
    for v in vs:
        print "%s\t%d" % (v, vs[v])
    for m in ms:
        print "%s\t%d" % (m, ms[m])
    for c in cs:
        print "%s\t%d" % (c, cs[c])
예제 #34
0
	def merge(self,ck):
		self.search+=ck.search
		self.hasHit+=ck.hasHit
		self.weitShow=DictUtil.merge(self.weitShow,ck.weitShow)
		self.posHit=DictUtil.merge(self.posHit,ck.posHit)
		self.hit=DictUtil.merge(self.hit,ck.hit)
		self.show=DictUtil.merge(self.show,ck.show)
		self.posShow=DictUtil.merge(self.posShow,ck.posShow)
		self.albumShow=DictUtil.merge(self.albumShow,ck.albumShow)
		self.albumHit=DictUtil.merge(self.albumHit,ck.albumHit)
		self.topicShow=DictUtil.merge(self.topicShow,ck.topicShow)
		self.topicHit=DictUtil.merge(self.topicHit,ck.topicHit)
		self.foodShow=DictUtil.merge(self.foodShow,ck.foodShow)
		self.foodHit=DictUtil.merge(self.foodHit,ck.foodHit)
		self.hitCount=DictUtil.merge(self.hitCount,ck.hitCount)
		self.hitTitleShow=DictUtil.merge(self.hitTitleShow,ck.hitTitleShow)
		self.hitTitleHit=DictUtil.merge(self.hitTitleHit,ck.hitTitleHit)
		if self.ms == None:
			self.ms=ck.ms
		else:
			self.ms.merge(ck.ms)
예제 #35
0
def dictSum(d):
	if SumMark not in d:
		return DictUtil.sum(d)
	return d[SumMark]
예제 #36
0
 def merge(self, dr):
     self.userNum += dr.userNum
     DictUtil.merge(self.channels, dr.channels)
     DictUtil.merge(self.hits, dr.hits)
     DictUtil.merge(self.firstDivs, dr.firstDivs)
     DictUtil.merge(self.lastDivs, dr.lastDivs)
     DictUtil.merge(self.mc, dr.mc)
     DictUtil.merge(self.mu, dr.mu)
     DictUtil.merge(self.tu, dr.tu)
예제 #37
0
def readLine(f):
	cates=DBCateName.readCateFile(open("/home/zhangzhonghui/log-mining/com/haodou/log-mining/util/cateidName.txt"))
	counts={}
	tc={}
	for line in f:
		if line.find("moreSearch") < 0:
			continue
		cols=line.strip().split("\t")
		if len(cols) < 11 or cols[-10] != "moreSearch":
			continue
		#print cols[-10],cols[-9],cols[-11]
		k=cols[0]
		if True:
		#if k != "ck45_##total##":
			ts=eval(cols[-3])
			if type(ts) == int:
				print line
				print cols[-4]
				print ts
			tshow=eval(cols[-2])
			rtn=int(cols[-1])
			if k not in counts:
				counts[k]=[0,0,{},{},0]
			counts[k][0]+=int(cols[-9])
			counts[k][4]+=rtn
			tns=counts[k][2]
			tnShow=counts[k][3]
			sum=0
			for t in ts:
				sum+=ts[t]
				tn=cates[int(t)]
				if tn not in tns:
					tns[tn]=0
				if tn not in tc:
					tc[tn]=[0,0]
				if not k.startswith("ck45"):
					tc[tn][0]+=ts[t]
				tns[tn]+=ts[t]
			showNum=0
			for t in tshow:
				if tshow[t] > showNum:
					showNum=tshow[t]
				tn=cates[int(t)]
				if tn not in tc:
					tc[tn]=[0,0]
				if not k.startswith("ck45"):
					tc[tn][1]+=int(cols[-9])
				if tn not in tnShow:
					tnShow[tn]=0
				tnShow[tn]+=tshow[t]
			counts[k][1]+=showNum
			#print "%s\t%s\t%s\t%s\t%.4f"%(cols[0],cols[32],cols[33],DictUtil.dictStr(tns),sum/(int(cols[32])+1e-32))

	for k in counts:
		(c,hc,ts,tshow,rtn)=counts[k]
		sum=0
		for tn in ts:
			sum+=ts[tn]
		ssum=0
		for tn in tshow:
			ssum+=tshow[tn]
		if ssum <= 0:
			continue
		print "%s\t%d\t%d\t%.4f\t%d\t%.4f\t%d\t%.4f\t%s\t%s"%(k,sum,c,sum/(c+1e-2),hc,sum/(hc+1e-2),rtn,rtn/(c+1e-12),DictUtil.dictStr(ts),DictUtil.dictStr(tshow))

	for t in tc:
		(v,s)=tc[t]
		print "%s\t%d\t%d\t%.4f"%(t,v,s,v/float(s+1e-2))
예제 #38
0
 def addChannel(self, user):
     self.userNum += 1
     media = ""
     if user.uuid != None and user.uuid in us:
         media = us[user.uuid]
     DictUtil.addOne(self.channels, media)
예제 #39
0
	def addHitCount(self,hc):
		DictUtil.addOne(self.hitCount,hc)
예제 #40
0
파일: analysis.py 프로젝트: wgzhao/scripts
	def addChannel(self,user):
		self.userNum+=1
		media=""
		if user.uuid != None and user.uuid in us:
			media=us[user.uuid]
		DictUtil.addOne(self.channels,media)
예제 #41
0
	def addHitTitleHit(self,title,type,keyword):
		DictUtil.addOne(self.hitTitleHit,hitTitle(title,keyword)+type)
예제 #42
0
파일: seqHit.py 프로젝트: rainly/scripts-1
 def merge(self, other):
     self.num += other.num
     DictUtil.merge(self.pages, other.pages)
     DictUtil.merge(self.sons, other.sons)
     DictUtil.merge2(self.sonTypes, other.sonTypes)
     DictUtil.merge(self.gsons, other.gsons)
     DictUtil.merge2(self.gsonTypes, other.gsonTypes)
     DictUtil.merge(self.wrongs, other.wrongs)
예제 #43
0
	def addAlbumHit(self,album):
		DictUtil.addOne(self.albumHit,album)
예제 #44
0
	def addFoodHit(self,foodId):
		DictUtil.addOne(self.foodHit,foodId)
예제 #45
0
파일: cardClick.py 프로젝트: wgzhao/scripts
	def addSearch(self,keyword,rid,pos):
		DictUtil.addOne(self.keywordShow,keyword)
		DictUtil.addOne(self.posShow,pos)
		DictUtil.addOne(self.ridShow,rid)
예제 #46
0
	def addTopicHit(self,topic):
		DictUtil.addOne(self.topicHit,topic)
예제 #47
0
def combine(f,conf,start=0,end=10000):
	lastDay=""
	curve=loss.Loss(1.0)
	firstMonth=-1
	lastMonth=0
	lastValue=1.0
	vs=[]
	yvs=[]
	lastYear=0
	lastYearValue=1.0
	es=[]
	state=stepBack.State()
	for line in f:
		cols=line.strip().split("\t")
		if len(cols) < 3:
			continue
		day=cols[0]
		year=int(cols[0].split("-")[0])
		if year < start or year > end:
			continue
		month=int(cols[0].split("-")[1])
		month=year*100+month
		if firstMonth <= 0:
			firstMonth=month
		if lastMonth <= 0:
			lastMonth=month
			lastYear=year
		if lastMonth!=month:
			rate=curve.now/lastValue-1.0
			sys.stderr.write("%d-%d\t%.2f%%\n"%(lastMonth/100,lastMonth%100,rate*100))
			vs.append(rate)
			lastMonth=month
			lastValue=curve.now
		if lastYear != year:
			rate=curve.now/lastYearValue-1.0
			yvs.append((lastYear,rate))
			lastYear=year
			lastYearValue=curve.now
		if lastDay == "":
			lastDay=day
		if lastDay != day:
			output(lastDay,es,curve,conf)
			lastDay=day
			es=[]
		e=float(cols[1])
		state.read(cols[2])
		if state.IsBull:
			fac=TrendFac
		else:
			fac=1.0/TrendFac
		if state.LastClose5:
			fac*=Close5Fac
		else:
			fac/=Close5Fac
		es.append((e,fac))
	if lastDay != "":
		rate=curve.now/lastValue-1.0
		sys.stderr.write("%d-%d\t%.2f%%\n"%(lastMonth/100,lastMonth%100,rate*100))
		vs.append(rate)
		rate=curve.now/lastYearValue-1.0
		yvs.append((lastYear,rate))
		output(lastDay,es,curve,conf)
		es=[]
	sys.stderr.write("firstMonth:%d\tlastMonth:%d\n"%(firstMonth,lastMonth))
	curve.setTime(dayDiv(firstMonth,lastMonth))
	sys.stderr.write(curve.confStr(conf)+"\n")
	(sum,avg,std,s0)=DictUtil.statis(vs)
	mrate=math.pow(curve.now,1.0/monthDiv(firstMonth,lastMonth))-1.0
	sys.stderr.write("月均统计	sum:%.4f	loss:%d,%.2f	avg:%.2f%%,%.2f%%	std:%.4f	avg/std:%.4f\n"%(sum,s0,float(s0)/len(vs),avg*100,mrate*100,std,mrate/std))
	rvs=[]
	for lastYear,rate in yvs:
		rvs.append(rate)
		sys.stderr.write("%d\t%.2f%%\n"%(lastYear,rate*100))
	(sum,avg,std,s0)=DictUtil.statis(rvs)
	yrate=curve.rate
	sys.stderr.write("年均统计  sum:%.4f    loss:%d,%.2f	avg:%.2f%%,%.2f%%    std:%.4f    avg/std:%.4f\n"%(sum,s0,float(s0)/len(vs),avg*100,yrate*100,std,yrate/std))
예제 #48
0
파일: posHit.py 프로젝트: wgzhao/scripts
def addAction(action,actions,ret):
	ret.num+=1
	DictUtil.addOne(ret.pages,len(action.pull))
	(hs,wrong)=getSonTypes(action.id,actions)
	hit=addTs(hs,ret.sons)
	DictUtil.addOne(ret.sonNums,hit)
예제 #49
0
파일: cardClick.py 프로젝트: wgzhao/scripts
	def addHit(self,keyword,rid,pos):
		DictUtil.addOne(self.keywordHit,keyword)
		DictUtil.addOne(self.posHit,pos)
		DictUtil.addOne(self.ridHit,rid)
예제 #50
0
파일: analysis.py 프로젝트: wgzhao/scripts
	def merge(self,dr):
		self.userNum+=dr.userNum
		DictUtil.merge(self.channels,dr.channels)
		DictUtil.merge(self.hits,dr.hits)
		DictUtil.merge(self.firstDivs,dr.firstDivs)
		DictUtil.merge(self.lastDivs,dr.lastDivs)
		DictUtil.merge(self.mc,dr.mc)
		DictUtil.merge(self.mu,dr.mu)
		DictUtil.merge(self.tu,dr.tu)
예제 #51
0
def addAction(action, actions, ret):
    ret.num += 1
    DictUtil.addOne(ret.pages, len(action.pull))
    (hs, wrong) = getSonTypes(action.id, actions)
    hit = addTs(hs, ret.sons)
    DictUtil.addOne(ret.sonNums, hit)