Exemple #1
0
def count():
    ips = {}
    uuids = {}
    for line in sys.stdin:
        cols = line.strip().split("\t")
        if not column.valid(cols):
            continue
        uuid = column.uuid(cols)
        ip = cols[column.IP_CID]
        if ip not in ips:
            ips[ip] = {}
        if uuid not in uuids:
            uuids[uuid] = {}
        ips[ip][uuid] = 1
        uuids[uuid][ip] = 1
    sum = 0
    for ip in ips:
        sum += len(ips[ip])
    print "ipNum\t%d\tipUuidNum\t%d\tuuidPerIp\t%.4f" % (len(ips), sum,
                                                         float(sum) / len(ips))
    sum = 0
    for uuid in uuids:
        sum += len(uuids[uuid])
    print "uuidNum\t%d\tuuidIpNum\t%d\tipPerUuuid\t%.4f" % (
        len(uuids), sum, float(sum) / len(uuids))
Exemple #2
0
def parseLine(line,type="appLog"):
	cols=line.strip().split("\t")
	if type == "userSort":
		(action,user)=sequence.getActionUser(cols,0)
		if action == None:
			return (None,None)
		if user == None:
			return (None,None)
		item=hitItemName.getPosName(action)
		return (item,user.name())
	elif type == "appLog":
		if not column.valid(cols):
			return (None,None)
		method=cols[column.METHOD_CID]
		para=cols[column.PARA_ID]
		version=column.intVersion(cols[column.VERSION_CID])
		item=getActionItem.getActionItem(method,para,version)
		name=hitItemName.getMethodName(method,para)
		offset=column.getValue(para,"offset")
		if offset == "0":
			name+=hitItemName.SplitSign+"0"
		item=hitItemName.packItem(name,item)
		uuid=column.uuid(cols)
		return (item,uuid)
	return (None,None)
Exemple #3
0
def t2():
    us = {}
    nus = {}
    ms = {}
    nms = {}
    for line in open("/home/zhangzhonghui/data/In.txt"):
        cols = line.strip().split()
        u = cols[2]
        us[u] = {}
    for line in sys.stdin:
        cols = line.strip().split("\t")
        if not column.valid(cols):
            continue
        uuid = column.uuid(cols)
        method = cols[column.METHOD_CID]
        if uuid not in us:
            nus[uuid] = 1
            if method not in nms:
                nms[method] = 1
            else:
                nms[method] += 1
            continue
        if method not in ms:
            ms[method] = 1
        else:
            ms[method] += 1
    for m in ms:
        nn = 0
        if m in nms:
            nn = nms[m]
        if ms[m] > 100:
            print m, ms[m], nn, float(
                ms[m]) * len(nus) / float(nn + 1e-12) / float(len(us))
    print "notIn user:"******"other user:", len(nus)
Exemple #4
0
def t2():
	us={}
	nus={}
	ms={}
	nms={}
	for line in open("/home/zhangzhonghui/data/In.txt"):
		cols=line.strip().split()
		u=cols[2]
		us[u]={}
	for line in sys.stdin:
		cols=line.strip().split("\t")
		if not column.valid(cols):
			continue
		uuid=column.uuid(cols)
		method=cols[column.METHOD_CID]
		if uuid not in us:
			nus[uuid]=1
			if method not in nms:
				nms[method]=1
			else:
				nms[method]+=1
			continue
		if method not in ms:
			ms[method]=1
		else:
			ms[method]+=1
	for m in ms:
		nn=0
		if m in nms:
			nn=nms[m]
		if ms[m] > 100:
			print m,ms[m],nn,float(ms[m])*len(nus)/float(nn+1e-12)/float(len(us))
	print "notIn user:"******"other user:",len(nus)
Exemple #5
0
def countSearch(file):
	tags=readCates(file)
	for line in sys.stdin:
		if line.find("search.getlist") < 0:
			continue
		cols=line.strip().split("\t")
		if not column.valid(cols):
			continue
		scene=column.getValue(cols[column.PARA_ID],"scene")
		if scene == "t2":
			continue
		tagid=column.getValue(cols[column.PARA_ID],"tagid")
		if tagid != None and tagid != "" and scene == "t1":
			print tagid
			continue
		keyword=column.getValue(cols[column.PARA_ID],"keyword")
		if keyword != None and keyword in tags:
			print tags[keyword]
Exemple #6
0
def qid(f):
	lastQLine=""
	for line in f:
		line=line.strip()
		if line.startswith('{"status":'):
			q=column.getQid2(line,"request_id")
			if q != None and q != "":
				print q+"\t"+line
		elif line.startswith('{"remote_addr":"'):
			ip=mlog.getU(line)
			time=mlog.value(line,'"log_time":')
			if ip != "" and ip != None:
				print str(random.randint(0,99))+"\t"+ip+"\t"+time+"\t"+line.strip()
		else:
			cols=line.split("\t")
			if not column.valid(cols):
				continue
			q=column.getValue(cols[column.PARA_ID],"request_id")
			if q != None and q != "":
				print q+"\t"+line
			else:
				print str(random.randint(0,99))+"\t"+line
Exemple #7
0
def appLogHit(aid):
	channels={}
	for line in sys.stdin:
		cols=line.strip().split("\t")
		if not column.valid(cols):
			continue
		method=cols[column.METHOD_CID]
		if method != "info.getalbuminfo":
			continue
		version=column.intVersion(cols[column.VERSION_CID])
		if version < 400:
			continue
		taid=column.getValue(cols[column.PARA_ID],"aid")
		if taid != aid:
			continue
		appid=cols[column.APPID_CID]
		if appid not in channels:
			channels[appid]=1
		else:
			channels[appid]+=1
	for appid in channels:
		print "%s\t%d"%(appid,channels[appid])
Exemple #8
0
def appLogHit(aid):
    channels = {}
    for line in sys.stdin:
        cols = line.strip().split("\t")
        if not column.valid(cols):
            continue
        method = cols[column.METHOD_CID]
        if method != "info.getalbuminfo":
            continue
        version = column.intVersion(cols[column.VERSION_CID])
        if version < 400:
            continue
        taid = column.getValue(cols[column.PARA_ID], "aid")
        if taid != aid:
            continue
        appid = cols[column.APPID_CID]
        if appid not in channels:
            channels[appid] = 1
        else:
            channels[appid] += 1
    for appid in channels:
        print "%s\t%d" % (appid, channels[appid])
Exemple #9
0
def count():
	ips={}
	uuids={}
	for line in sys.stdin:
		cols=line.strip().split("\t")
		if not column.valid(cols):
			continue
		uuid=column.uuid(cols)
		ip=cols[column.IP_CID]
		if ip not in ips:
			ips[ip]={}
		if uuid not in uuids:
			uuids[uuid]={}
		ips[ip][uuid]=1
		uuids[uuid][ip]=1
	sum=0
	for ip in ips:
		sum+=len(ips[ip])
	print "ipNum\t%d\tipUuidNum\t%d\tuuidPerIp\t%.4f"%(len(ips),sum,float(sum)/len(ips))
	sum=0
	for uuid in uuids:
		sum+=len(uuids[uuid])
	print "uuidNum\t%d\tuuidIpNum\t%d\tipPerUuuid\t%.4f"%(len(uuids),sum,float(sum)/len(uuids))
Exemple #10
0
def qid(f):
    lastQLine = ""
    for line in f:
        line = line.strip()
        if line.startswith('{"status":'):
            q = column.getQid2(line, "request_id")
            if q != None and q != "":
                print q + "\t" + line
        elif line.startswith('{"remote_addr":"'):
            ip = mlog.getU(line)
            time = mlog.value(line, '"log_time":')
            if ip != "" and ip != None:
                print str(random.randint(
                    0, 99)) + "\t" + ip + "\t" + time + "\t" + line.strip()
        else:
            cols = line.split("\t")
            if not column.valid(cols):
                continue
            q = column.getValue(cols[column.PARA_ID], "request_id")
            if q != None and q != "":
                print q + "\t" + line
            else:
                print str(random.randint(0, 99)) + "\t" + line
Exemple #11
0
def map():
    us = {}
    for line in sys.stdin:
        cols = line.strip().split("\t")
        if len(cols) == Reduce_Cols and cols[1] in types:  #过去的数据累加
            print line.strip()
            continue
        if not column.valid(cols):
            continue
        uid = column.uid(cols)
        uuid = column.uuidOnly(cols)
        ip = cols[column.IP_CID]
        time = cols[column.TIME_CID]
        channel = cols[column.MEDIA_CID]
        if uid != None and len(uid) != 0:
            addItem2(U_Type, uuid, time, us, Uid_Fix + uid)
            addItem2(Channel_Type, channel, time, us, Uid_Fix + uid)
            addItem2(Ip_Type, ip, time, us, Uid_Fix + uid)
        if uuid != None and len(uuid) != 0:
            addItem2(U_Type, uid, time, us, Uuid_Fix + uuid)
            addItem2(Channel_Type, channel, time, us, Uuid_Fix + uuid)
            addItem2(Ip_Type, ip, time, us, Uuid_Fix + uuid)
    for u in us:
        output(u, us[u])
Exemple #12
0
def map():
	us={}
	for line in sys.stdin:
		cols=line.strip().split("\t")
		if len(cols) == Reduce_Cols and cols[1] in types: #过去的数据累加
			print line.strip()
			continue
		if not column.valid(cols):
			continue
		uid=column.uid(cols)
		uuid=column.uuidOnly(cols)
		ip=cols[column.IP_CID]
		time=cols[column.TIME_CID]
		channel=cols[column.MEDIA_CID]
		if uid != None and len(uid) !=0:
			addItem2(U_Type,uuid,time,us,Uid_Fix+uid)
			addItem2(Channel_Type,channel,time,us,Uid_Fix+uid)
			addItem2(Ip_Type,ip,time,us,Uid_Fix+uid)
		if uuid != None and len(uuid) !=0:
			addItem2(U_Type,uid,time,us,Uuid_Fix+uuid)
			addItem2(Channel_Type,channel,time,us,Uuid_Fix+uuid)
			addItem2(Ip_Type,ip,time,us,Uuid_Fix+uuid)
	for u in us:
		output(u,us[u])