Beispiel #1
0
def outputa(lastMethod,paras,paraUs):
	allu={}
	for p in paras:
		print lastMethod+"-"+p+"\t"+str(dictInfo.info(paraUs[p]))
		for u in paraUs[p]:
			if u not in allu:
				allu[u]=paraUs[p][u]
			else:
				allu[u]+=paraUs[p][u]
		paraUs[p]={}
	print lastMethod+"\t"+str(dictInfo.info(allu))
Beispiel #2
0
def outputa(lastMethod, paras, paraUs):
    allu = {}
    for p in paras:
        print lastMethod + "-" + p + "\t" + str(dictInfo.info(paraUs[p]))
        for u in paraUs[p]:
            if u not in allu:
                allu[u] = paraUs[p][u]
            else:
                allu[u] += paraUs[p][u]
        paraUs[p] = {}
    print lastMethod + "\t" + str(dictInfo.info(allu))
Beispiel #3
0
def count(f):
	lineNum=0
	uNum=0
	us={}
	nus={}
	uids={}
	for line in f:
		cols=line.strip().split("\t")
		if len(cols) < APP_LOG_COLUMNS4:
			continue
		lineNum+=1
		u=cols[USER_CID]
		uuid=cols[UUID_ID]
		if uuid not in uids:
			uids[uuid]=1
		else:
			uids[uuid]+=1
		if PositiveNumP.match(u):
			if u not in us:
				us[u]={}
			uNum+=1
			if uuid not in us[u]:
				us[u][uuid]=1
			else:
				us[u][uuid]+=1
		else:
			if uuid not in nus:
				nus[uuid]=1
			else:
				nus[uuid]+=1
	sum=0
	for u in us:
		sum+=len(us[u])
		if len(us[u]) >= 3:
			print u,us[u]
		v=0
		for uuid in us[u]:
			v+=us[u][uuid]
		us[u]=v
	print "注册用户请求日志行数统计信息"
	print dictInfo.info(us)
	print "非注册用户请求日志行数统计信息"
	print dictInfo.info(nus)
	print "注册用户日志行数\t总日志行数\t注册用户占比"
	print "%d\t%d\t%.4f"%(uNum,lineNum,float(uNum)/lineNum)
	print "总uuid数\t注册uuid+非注册uuid\t注册与非注册uuid重叠比例"
	print "%d\t%d\t%.4f"%(len(uids),len(nus)+sum,float(len(nus)+sum)/len(uids)-1.0)
	print "注册用户数\t注册uuid数\t注册用户多个uuid比例"
	print "%d\t%d\t%.4f"%(len(us),sum,float(sum)/len(us)-1.0)
	print "注册用户数\t注册用户日志行数\t注册用户平均行数"
	print "%d\t%d\t%.4f"%(len(us),uNum,float(uNum)/len(us))
	print "非注册uuid数\t非注册用户日志行数\t非注册用户平均行数"
	print "%d\t%d\t%.4f"%(len(nus),lineNum-uNum,float(lineNum-uNum)/len(nus))
Beispiel #4
0
def methodDistribute():
    ms = {}
    for line in sys.stdin:
        cols = line.strip().split("\t")
        if len(cols) < APP_LOG_COLUMNS:
            continue
        method = cols[METHOD_CID]
        if method not in ms:
            ms[method] = 1
        else:
            ms[method] += 1
    for m in ms:
        print m, ms[m]
    print dictInfo.info(ms)
Beispiel #5
0
def methodDistribute():
	ms={}
	for line in sys.stdin:
		cols=line.strip().split("\t")
		if len(cols) < APP_LOG_COLUMNS:
			continue
		method=cols[METHOD_CID]
		if method not in ms:
			ms[method]=1
		else:
			ms[method]+=1
	for m in ms:
		print m,ms[m]
	print dictInfo.info(ms)
Beispiel #6
0
def ipDis():
    ips = {}
    lastIp = {}
    wrong = 0
    total = 0
    ws = {}
    for line in sys.stdin:
        cols = line.strip().split("\t")
        if len(cols) < APP_LOG_COLUMNS:
            continue
        if line.find("mobiledevice.initandroiddevice") > 0:
            continue
        u = ActionUser(cols)
        if u == None or u == "":
            continue
        ip = cols[IP_CID]
        if ip == "":
            continue
        if ip not in ips:
            ips[ip] = u
        else:
            v = ips[ip]
            if type(v) == dict:
                if u not in v:
                    v[u] = 1
                else:
                    if u != lastIp[ip]:
                        wrong += 1
                        if ip not in ws:
                            ws[ip] = 1
                        else:
                            ws[ip] += 1
            else:
                if v != u:
                    ips[ip] = {}
                    ips[ip][u] = 1
                    ips[ip][v] = 1
        lastIp[ip] = u
        total += 1
    for ip in ips:
        v = ips[ip]
        if type(v) == dict:
            ips[ip] = len(v)
        else:
            ips[ip] = 1
    print dictInfo.info(ws)
    print "total", total
    print "wrong", wrong
    print dictInfo.info(ips)
Beispiel #7
0
def ipDis():
	ips={}
	lastIp={}
	wrong=0
	total=0
	ws={}
	for line in sys.stdin:
		cols=line.strip().split("\t")
		if len(cols) < APP_LOG_COLUMNS:
			continue
		if line.find("mobiledevice.initandroiddevice") > 0:
			continue
		u=ActionUser(cols)
		if u == None or u=="":
			continue
		ip=cols[IP_CID]
		if ip == "":
			continue
		if ip not in ips:
			ips[ip]=u
		else:
			v=ips[ip]
			if type(v) == dict:
				if u not in v:
					v[u]=1
				else:
					if u != lastIp[ip]:
						wrong+=1
						if ip not in ws:
							ws[ip]=1
						else:
							ws[ip]+=1
			else:
				if v != u:
					ips[ip]={}
					ips[ip][u]=1
					ips[ip][v]=1
		lastIp[ip]=u
		total+=1
	for ip in ips:
		v=ips[ip]
		if type(v) == dict:
			ips[ip]=len(v)
		else:
			ips[ip]=1
	print dictInfo.info(ws)	
	print "total",total
	print "wrong",wrong
	print dictInfo.info(ips)
Beispiel #8
0
def join():
    ds = {}
    ds["divTime"] = 0
    ds["divNum"] = 0
    ds["wrongTime"] = 0
    ds["wrongNum"] = 0
    ds["not"] = 0
    ds["viewNoReceive"] = 0
    divs = {}
    lastU = ""
    ms = {}
    for line in sys.stdin:
        cols = line.strip().split("\t")
        if len(cols) < 5:
            continue
        u = cols[0]
        if lastU == "":
            lastU = u
        if lastU != u:
            output(lastU, ms, ds, divs)
            lastU = u
            ts = {}
            ms = {}
        if cols[4].startswith("4"):
            continue
        method = cols[1]
        message = cols[2]
        time = int(cols[3])
        if message not in ms:
            ms[message] = {}
        ms[message][method] = time
    if lastU != "":
        output(lastU, ms, ds, divs)
    wrongTime = ds["wrongTime"]
    wrongNum = ds["wrongNum"]
    divTime = ds["divTime"]
    divNum = ds["divNum"]
    print "view but not received:%d\n" % (ds["viewNoReceive"])
    print "not:%d\n" % (ds["not"])
    print "wrong:%d\t%d\t%f\n" % (wrongTime, wrongNum, float(wrongTime) /
                                  (wrongNum + 1e-16))
    print "div:%d\t%d\t%f\n" % (divTime, divNum, float(divTime) /
                                (divNum + 1e-16))
    print dictInfo.info(divs)
    print ccinfo.info(divs)
Beispiel #9
0
def join():
	ds={}
	ds["divTime"]=0
	ds["divNum"]=0
	ds["wrongTime"]=0
	ds["wrongNum"]=0
	ds["not"]=0
	ds["viewNoReceive"]=0
	divs={}
	lastU=""
	ms={}
	for line in sys.stdin:
		cols=line.strip().split("\t")
		if len(cols) < 5:
			continue
		u=cols[0]
		if lastU == "":
			lastU = u
		if lastU != u:
			output(lastU,ms,ds,divs)
			lastU=u
			ts={}
			ms={}
		if cols[4].startswith("4"):
			continue
		method=cols[1]
		message=cols[2]
		time=int(cols[3])
		if message not in ms:
			ms[message]={}
		ms[message][method]=time
	if lastU != "":
		output(lastU,ms,ds,divs)
	wrongTime=ds["wrongTime"]
	wrongNum=ds["wrongNum"]
	divTime=ds["divTime"]
	divNum=ds["divNum"]
	print "view but not received:%d\n"%(ds["viewNoReceive"])
	print "not:%d\n"%(ds["not"])
	print "wrong:%d\t%d\t%f\n"%(wrongTime,wrongNum,float(wrongTime)/(wrongNum+1e-16))
	print "div:%d\t%d\t%f\n"%(divTime,divNum,float(divTime)/(divNum+1e-16))
	print dictInfo.info(divs)
	print ccinfo.info(divs)
Beispiel #10
0
def appleUserId():
    right = 0
    wrong = 0
    did = ""
    pds = {}
    ds = {}
    for line in sys.stdin:
        cols = line.strip().split("\t")
        if len(cols) <= APP_LOG_COLUMNS:
            continue
        did = cols[UUID_ID]
        if did == "haodou" or did == "" or did == "null" or did == "haodoua1000033d709ec6":
            continue
        ip = cols[IP_CID]
        if cols[MEDIA_CID] == "appstore":
            uid = uuid(cols)
            if uid == None:
                continue
            if PositiveNumP.match(uid):
                if PositiveNumP.match(cols[USER_CID]):
                    right += 1
                    print line
                else:
                    wrong += 1
            if did not in pds:
                pds[did] = {}
            if ip not in pds[did]:
                pds[did][ip] = 1
        else:
            if did not in ds:
                ds[did] = {}
            if ip not in ds[did]:
                ds[did][ip] = 1
    for did in pds:
        pds[did] = len(pds[did])
    for did in ds:
        ds[did] = len(ds[did])
    import dictInfo
    print dictInfo.info(ds)
    print dictInfo.info(pds)
    print "right", right
    print "wrong", wrong
Beispiel #11
0
def appleUserId():
	right=0
	wrong=0
	did=""
	pds={}
	ds={}
	for line in sys.stdin:
		cols=line.strip().split("\t")
		if len(cols) <= APP_LOG_COLUMNS:
			continue
		did=cols[UUID_ID]
		if did == "haodou" or did == "" or did == "null" or did == "haodoua1000033d709ec6":
			continue
		ip=cols[IP_CID]
		if cols[MEDIA_CID] == "appstore":
			uid=uuid(cols)
			if uid == None:
				continue
			if PositiveNumP.match(uid):
				if PositiveNumP.match(cols[USER_CID]):
					right+=1
					print line
				else:
					wrong+=1
			if did not in pds:
				pds[did]={}
			if ip not in pds[did]:
				pds[did][ip]=1
		else:
			if did not in ds:
				ds[did]={}
			if ip not in ds[did]:
				ds[did][ip]=1
	for did in pds:
		pds[did]=len(pds[did])
	for did in ds:
		ds[did]=len(ds[did])
	import dictInfo
	print dictInfo.info(ds)
	print dictInfo.info(pds)
	print "right",right
	print "wrong",wrong
Beispiel #12
0
def tagidDis():
    rkt = {}
    kt = {}
    t = {}
    knt = {}
    ks = {}
    names = {}
    for line in sys.stdin:
        cols = line.strip().split("\t")
        if len(cols) < METHOD_CID + 1:
            continue
        v = cols[VERSION_CID]
        if v != "4.0" and v != "v4.0":
            continue
        method = cols[METHOD_CID]
        if not method.startswith("search.getlist"):
            continue
        para = cols[PARA_ID]
        tid = getValue(para, "tagid")
        kw = getValue(para, "keyword")
        rqid = getValue(para, "return_request_id")
        if tid == "null":
            if kw != None and kw != "":
                if kw not in knt:
                    knt[kw] = 1
                else:
                    knt[kw] += 1
        elif tid == "" or tid == None:
            if kw not in ks:
                ks[kw] = 1
            else:
                ks[kw] += 1
        if tid != None and tid != "" and tid != "null":
            if kw != None and kw != "":
                if rqid != None and rqid != "":
                    if tid not in rkt:
                        rkt[tid] = 1
                    else:
                        rkt[tid] += 1
                if tid not in kt:
                    kt[tid] = 1
                else:
                    kt[tid] += 1
                if tid not in names:
                    names[tid] = kw
            else:
                if tid not in t:
                    t[tid] = 1
                else:
                    t[tid] += 1
    print "len of rkt", len(rkt)
    print "len of kt", len(kt)
    print "len of t", len(t)
    for tid in rkt:
        if tid not in names:
            names[tid] = ""
        print tid, names[tid], rkt[tid]
    for tid in t:
        if tid in names:
            print tid, names[tid], t[tid]
        else:
            print tid, t[tid]
    for tid in kt:
        print "kt", tid, names[tid], kt[tid]
    info = dictInfo.info(knt)
    print info
    for k in info["top10"]:
        print k[0], k[1]
    info = dictInfo.info(ks)
    print info
    for k in info["top10"]:
        print k[0], k[1]
Beispiel #13
0
import dictInfo

if __name__ == "__main__":
    d = {2: 3, "ty": 4, 10: 4, 12: 4, 99: 100}
    print dictInfo.info(d)
    print dictInfo.topn(d, 3)
Beispiel #14
0
def tagidDis():
	rkt={}
	kt={}
	t={}
	knt={}
	ks={}
	names={}
	for line in sys.stdin:
		cols=line.strip().split("\t")
		if len(cols) < METHOD_CID+1:
			continue
		v=cols[VERSION_CID]
		if v != "4.0" and v != "v4.0":
			continue
		method=cols[METHOD_CID]
		if not method.startswith("search.getlist"):
			continue
		para=cols[PARA_ID]
		tid=getValue(para,"tagid")
		kw=getValue(para,"keyword")
		rqid=getValue(para,"return_request_id")
		if tid == "null":
			if kw != None and kw != "":
				if kw not in knt:
					knt[kw]=1
				else:
					knt[kw]+=1
		elif tid == "" or tid == None:
			if kw not in ks:
				ks[kw]=1
			else:
				ks[kw]+=1
		if tid != None and tid != "" and tid != "null":
			if kw != None and kw != "":
				if rqid != None and rqid != "":
					if tid not in rkt:
						rkt[tid]=1
					else:
						rkt[tid]+=1
				if tid not in kt:
					kt[tid]=1
				else:
					kt[tid]+=1
				if tid not in names:
					names[tid]=kw
			else:
				if tid not in t:
					t[tid]=1
				else:
					t[tid]+=1
	print "len of rkt",len(rkt)
	print "len of kt",len(kt)
	print "len of t",len(t)
	for tid in rkt:
		if tid not in names:
			names[tid]=""
		print tid,names[tid],rkt[tid]
	for tid in t:
		if tid in names:
			print tid,names[tid],t[tid]
		else:
			print tid,t[tid]
	for tid in kt:
		print "kt",tid,names[tid],kt[tid]
	info=dictInfo.info(knt)
	print info
	for k in info["top10"]:
		print k[0],k[1]
	info=dictInfo.info(ks)
	print info
	for k in info["top10"]:
		print k[0],k[1]
Beispiel #15
0
import dictInfo

if __name__=="__main__":
	d={2:3,"ty":4,10:4,12:4,99:100}
	print dictInfo.info(d)
	print dictInfo.topn(d,3)

Beispiel #16
0
def output(lastm, us, vs):
    s = lastm
    info = dictInfo.info(us)
    print lastm + "\t" + str(info)
    info = dictInfo.info(vs)
    print "v_" + lastm + "\t" + str(info)
Beispiel #17
0
def output(lastm,us,vs):
	s=lastm
	info=dictInfo.info(us)
	print lastm+"\t"+str(info)
	info=dictInfo.info(vs)
	print "v_"+lastm+"\t"+str(info)