def usermt2(urls01,urls11,Realua): refs={} userurl0=[] userurl1=[] realurl=[] get01s={} getref={} #check() urls= dict(urls01.items() + urls11.items()) # print urls for fname in urls: url=urls[fname] url=url.encode("utf-8") Get, Host, Ref, UAgent, Cookie = rexurl(url) if len(Get)!=0 and UAgent==Realua: if len(Ref)==0: if Get not in get01s: get01s[Get]=1 else: get01s[Get]=get01s[Get]+1 else: # if UAgent==Realua: #print fname getref[Get]=Ref if Ref not in refs: refs[Ref]=1 else: refs[Ref]=refs[Ref]+1 refss=fitref(refs) #simpile fittler T>=10 for get0 in get01s.keys(): # print "get0:",get0 get0 = get0.strip() if get0 in refs.keys() and back(get0) != 0: userurl0.append(get0) us = userurl0[:] for ref in refss.keys(): ref=ref.strip() for getr in getref.keys(): gref1 = getref[getr] getr=getr.strip() if ref==getr and back(getr)!=0: if gref1 not in userurl1: userurl1.append(gref1) else: break userurl0.extend(refs.keys()) for url0 in userurl0: url0=url0.strip() if url0 in userurl1: realurl.append(url0) realurl.extend(us) realurls1 = set(realurl) realurls = list(realurls1) return realurls
def getu(get, refs, userurl1): get = get.strip() if back(get) != 0: if get in refs.keys() and refs[get] >= 10: if len(userurl1) != 0: if get in userurl1: return 0
def getu(get,refs,userurl1): get=get.strip() if back(get)!=0: if get in refs.keys() and refs[get]>=10: if len(userurl1)!=0: if get in userurl1: return 0
def fitref(refs): frefs={} for re in refs.keys(): numref=refs[re] if numref>=10 and back(re)!=0: frefs[re]=numref return frefs
def fitref(refs): frefs = {} for re in refs.keys(): numref = refs[re] if numref >= 10 and back(re) != 0: frefs[re] = numref return frefs
def fback(results): backs={} urls={} h_totals={} r_totals={} u_totals={} get=[] for row in results: fname = row[1].encode("utf-8") surl = row[2].encode("utf-8") url=surl.replace('\t','').replace('\n',';;') isGet=re.compile(r'[POS|GE]T(.+?)HTTP/1.[0|1];;') Gets=re.findall(isGet,url) Get="".join(Gets)[1:] #Get=Get[1:] if len(Get)==0: continue else: get.append(Get) isHost=re.compile(r'Host:(.+?);;') Host=re.findall(isHost,url) Host="".join(Host)[1:] if Host not in h_totals: h_totals[Host]=1 else: h_totals[Host]=h_totals[Host]+1 isReferer=re.compile(r'Referer:(.+?);;') Ref=re.findall(isReferer,url) Ref="".join(Ref)[1:] if Ref not in r_totals: r_totals[Ref]=1 else: r_totals[Ref]=r_totals[Ref]+1 isUser=re.compile(r'User-Agent:(.+?);;') UAgent=re.findall(isUser,url) UAgent="".join(UAgent)[1:] if UAgent not in u_totals: u_totals[UAgent]=1 else: u_totals[UAgent]=u_totals[UAgent]+1 a=back(Get) if a==0: backs[fname]=surl else: urls[fname]=surl return backs,urls,h_totals,r_totals,u_totals,get
def get11ref(urls): cheurl={} b=0 for u in urls.keys(): a=back(u) if a==0: b = b + 1 cheurl[u] = urls[u] # continue else: continue # b=b+1 # cheurl[u]=urls[u] return cheurl,b
def fback(results): backs={} urls={} h_totals={} r_totals={} u_totals={} for row in results: fname = row[1].encode("utf-8") surl = row[2].encode("utf-8") url=surl.replace('\t','').replace('\n',';;') isGet=re.compile(r'[POST|GET](.+?);') Get=re.findall(isGet,url) Get="".join(Get) isHost=re.compile(r'Host:(.+?);') Host=re.findall(isHost,url) Host="".join(Host) if Host not in h_totals: h_totals[Host]=1 else: h_totals[Host]=h_totals[Host]+1 isReferer=re.compile(r'Referer:(.+?);') Ref=re.findall(isReferer,url) Ref="".join(Ref) if Ref not in r_totals: r_totals[Ref]=1 else: r_totals[Ref]=r_totals[Ref]+1 isUser=re.compile(r'User-Agent:(.+?)\);') UAgent=re.findall(isUser,url) UAgent="".join(UAgent) if UAgent not in u_totals: u_totals[UAgent]=1 else: u_totals[UAgent]=u_totals[UAgent]+1 a=back(Get) if a==0: backs[fname]=surl else: urls[fname]=surl return backs,urls,h_totals,r_totals,u_totals
def main(): filename = sys.argv[-1] # just take the last argument # this is not the right way to do flags if filename == '--no-file' or filename == "-n": noFile() elif filename == '-h' or filename == '--help' or filename == 'main.py': help() elif filename == '--view-overhead': overhead() # any other flag else: suffix = filename[-4:] # the last flag if filename[0] == '-': print('Invalid flag.\nRun with -h for help.') return elif suffix != '.txt': print('Invalid filetype.\nRun with -h for help.') return # if it's a .txt, go here: with open(filename) as givenFile: translate = givenFile.read() print(type(translate)) if translate.find('●') != -1 or translate.find( '○') != -1: # if a clack decoded = open("result.txt", "w+") # why is there an invalid charcter that need to be removed when decoding? decoded.write(back.back(translate)) #[:-1]) decoded.close() else: # if not a clack encoded = open("result.txt", "w+") encoded.write(clack.clack(translate, False)) encoded.close
def noFile(): selLoop = True # selection loop while selLoop: print("Type 'done' on a new line when finished typing message\n") translate = '' while True: # continue adding text until user types # 'done' on a new line tmpTxt = input() if tmpTxt == 'done': break translate += tmpTxt if translate.find('●') != -1 or translate.find('○') != -1: print('\nResults:\n' + '"' + back.back(translate) + '"') selLoop = False elif translate == '': selLoop = False else: print('\nResult:' + clack.clack(translate, False)) selLoop = False
def __init__(self): self.b = back.back()
def ReURL(ua,results): #print ua refs = {} #print refs userurl0=[] userurl1=[] realurl=[] URL=[] get01s={} getref={} #print ua for row in results: try: fnames = row[0].encode("utf-8") fd = fnames.split("|") times = fd[0].strip() url = row[1].encode("utf-8") surl = url.replace('\t', '').replace('\n', ';;') #print times Get, Host, Ref, UAgent, Cookie = rexurlg(surl) #print Get #if len(Get) == 0 and ua != UAgent: if UAgent==ua and len(Get) != 0: if len(Ref) == 0: # print Get if Get not in get01s: # print len(Get) get01s[Get] = 1 else: get01s[Get]=get01s[Get] + 1 else: Ref = Ref.strip() if http(Get)!=0: getref[Get]=Ref if Ref not in refs: refs[Ref] = 1 else: refs[Ref] = refs[Ref] + 1 else: continue else: continue except: continue #print refs #print "len(refs):",refs for get0 in get01s.keys(): get0 = get0.strip() #if get0 in refs.keys() and back(get0) != 0: if get0 in refs.keys(): userurl0.append(get0) us=userurl0[:] #print us #print "len(us):",get01s refss = fitref(refs) #print "len(refss):",len(getref) userurl1GR={} for ref in refss.keys(): ref = ref.strip() for getr in getref.keys(): gref1 = getref[getr] getr = getr.strip() if ref == getr: #userurl1.append(gref1) userurl1GR[gref1]=getr if gref1 not in userurl1: userurl1.append(gref1) # userurl1GR[gref1]=getr else: break # for urlgref in userurl1GR.keys(): # rget = userurl1GR[urlgref] # #if urlgref in realurl and rget not in realurl: # if urlgref in userurl1: # if back(rget) != 0: # URL.append(rget) #print "userurl1:",userurl1 #print "len(userurl1)",len(userurl1) userurl0.extend(refss.keys()) # print userurl0 for url0 in userurl0: url0 = url0.strip() if url0 in userurl1: realurl.append(url0) for urlgref in userurl1GR.keys(): rget = userurl1GR[urlgref] if urlgref in realurl and rget not in realurl: #if urlgref in realurl: if back(rget) != 0: URL.append(rget) #print realurl #print "last:",URL #checklist(URL) # realurl.extend(us) URL.extend(us) #print realurl realurl.extend(URL) realurls1 = set(realurl) realurls2 = list(realurls1) #print realurls #print 'len of possible urls:', len(realurls) return realurls2
isGet=re.compile(r'GET(.+?);') Get=re.findall(isGet,url) Get="".join(Get) #print Get #print fname,type(Get) #break isHost=re.compile(r'Host:(.+?);') Host=re.findall(isHost,url) #print Host isReferer=re.compile(r'Referer:(.+?);') Referer=re.findall(isReferer,url) #print Referer isUser=re.compile(r'User-Agent:(.+?)\);') UserAgent=re.findall(isUser,url) #print UserAgent a=back(Get) #print a #break if a==0: bb=bb+1 #cursor.execute("insert into backurl (fname,url) values (%s,%s)",[fname,Get]) #print fname, Get #cursor.execute("delete from gethttp where fname='%s'" %fname) #conn.commit() else: aa=aa+1 b=soft(Get) if b==0: b2=b2+1 print fname,Get
def usermt1(urls01, urls11, Realua): refs={} userurl0=[] userurl1=[] realurl=[] get01s={} getref={} #check() aas=0 urls=dict(urls01.items() + urls11.items()) #print urls for fname in urls.keys(): urls1 = urls[fname] a=chardet.detect(urls1) typ=a['encoding'] typs=typ.strip() try: if typs=="ascii": Get, Host, Ref, UAgent, Cookie = rexurl(urls1) if len(Get) != 0 and UAgent == Realua: if len(Ref) == 0: if Get not in get01s: get01s[Get] = 1 else: get01s[Get] = get01s[Get] + 1 else: Ref=Ref.strip() getref[Get] = Ref if Ref not in refs: refs[Ref] = 1 else: refs[Ref] = refs[Ref] + 1 except: continue for get0 in get01s.keys(): get0=get0.strip() if get0 in refs.keys() and back(get0)!=0: userurl0.append(get0) us=userurl0[:] refss = fitref(refs) for ref in refss.keys(): ref = ref.strip() for getr in getref.keys(): gref1 = getref[getr] getr = getr.strip() if ref == getr and back(getr)!=0: if gref1 not in userurl1: userurl1.append(gref1) else: break userurl0.extend(refs.keys()) for url0 in userurl0: url0 = url0.strip() if url0 in userurl1: realurl.append(url0) realurl.extend(us) realurls1=set(realurl) realurls=list(realurls1) print 'first judge realurls:',len(realurls) return realurls
def maxtime2(gettimes,realurl): maxT={} urltime={} maxs=" " maxtimes = "2012/8/16 1:28:33" lentime = len(realurl) #print "len(lentime):",lentime cont = 0 #print realurl for ti in gettimes.keys(): ftime=gettimes[ti] ftime = ftime.strip() # ftime=ftime.replace('/','-') #print chardet.detect(ftime) # print ftime # cont = cont + 1 # lentime=len(gettimes) tis=ti.strip() if back(tis)!=0 and tis in realurl: urltime[ftime]=tis #print maxtimes #print ftime cont = cont + 1 #print tis date1 = datetime.datetime.strptime(maxtimes, "%Y/%m/%d %H:%M:%S") date2 = datetime.datetime.strptime(ftime, "%Y/%m/%d %H:%M:%S") #print cont if cont==lentime: maxT[maxtimes]=urltime[maxtimes] return maxT #print ftime #print date2 # # date1=datetime.datetime(date1[0],date1[1],date1[2],date1[3],date1[4],date1[5]) # # date2=datetime.datetime(date2[0],date2[1],date2[2],date2[3],date2[4],date2[5]) # # return date1-date2 # #print date1 if date1 <date2: #cont = cont + 1 #print cont maxtimes=ftime print maxtimes #print cont # cont = cont + 1 # print cont #return date1 else: continue # maxs =ftime #return date2 # print maxs # maxtime2 = Caltime2(maxtime, ftime) # maxtime=maxtime2 #print maxtime # maxT[maxtime] = gettimes[tis] # print ftime else: continue
# -*- coding: UTF-8 -*- #Auther Li Mengxing #descrpition:程序主入口 import time from transitions import Machine from surround import surround from back import back from land import land from launch import launch #from surround import transitions, states # model = surround() # model2 = back() #分别创建四个实例 surrounder = surround() backer = back() lander = land() launcher = launch() #状态定义 status = ['READY','WAITING','WORKING','DONE'] #设置状态转移 transitions = [ {'trigger': 'initialed', 'source': 'READY', 'dest': 'WAITING' }, {'trigger': 'wating_over', 'source': 'WAITING', 'dest': 'WORKING' }, {'trigger': 'work_done', 'source': 'WORKING', 'dest': 'DONE' }, {'trigger': 'ready','source':'DONE','dest':'READY'} ] #分别设置各自的状态机
def usermt(urls01, urls11, Realua): refs = {} userurl0=[] userurl1=[] realurl=[] get01s={} getref={} ac=0 urls = dict(urls01.items() + urls11.items()) for fname in urls.keys(): urls1 = urls[fname] a = chardet.detect(urls1) #print fname # ac=ac+1 # print urls1 # print ac #print a typ = a['encoding'] typs = typ.strip() # ap="\""+typs+"\"" # #print ap # #print typ,type(typ) # data1 = urls1.decode(ap) # if ap!="ascii": # #urlll = data1.encode("utf-8") # print ap # print "p" # #print chardet.detect(urlll) # #print type(data1) # # #urlll= data1.encode("utf-8") # #print chardet.detect(urlll) try: Get, Host, Ref, UAgent, Cookie = rexurl(urls1) if len(Get) != 0 and UAgent == Realua: # ac = ac + 1 # print urls1 # print ac if len(Ref) == 0: if Get not in get01s: get01s[Get] = 1 else: get01s[Get] = get01s[Get] + 1 else: Ref = Ref.strip() getref[Get] = Ref if Ref not in refs: refs[Ref] = 1 else: refs[Ref] = refs[Ref] + 1 except: print fname continue for get0 in get01s.keys(): get0 = get0.strip() if get0 in refs.keys() and back(get0) != 0: userurl0.append(get0) us = userurl0[:] print "len(us):",len(us) refss = fitref(refs) for ref in refss.keys(): ref = ref.strip() for getr in getref.keys(): gref1 = getref[getr] getr = getr.strip() if ref == getr and back(getr) != 0: if gref1 not in userurl1: userurl1.append(gref1) else: break # print len(userurl1) userurl0.extend(refs.keys()) # print userurl0 for url0 in userurl0: url0 = url0.strip() if url0 in userurl1: realurl.append(url0) # realurl.extend(us) realurl.extend(us) realurls1 = set(realurl) realurls = list(realurls1) print 'kkk:', len(realurls) return realurls # for get1 in getref.keys(): # gref1=getref[get1] # gref1=gref1.strip() # #print "gref1",gref1 # if get0==gref1 and get0 in refs.keys(): # #print get0 # if get0 not in userurl0: # userurl0.append(get0) # userurl1.extend(userurl0) # #print userurl1 # if getu(get1,refs,userurl1)==0: # userurl1.append(get1) # print refs.keys() # print "aas:",aas # print "len(get01s):",len(get01s) # print "len(refs):", len(refs) # # check(fitref(refs)) # # print refs # refss = fitref(refs) # simpile fittler T>=10 # # print "len(refss):", len(refss)
def usermt11(urls01, urls11, Realua): refs = {} userurl0 = [] userurl1 = [] realurl = [] get01s = {} getref = {} #getTime={} gettimes = {} # global gettime1 # global gettime2 gettime1 = {} gettime2 = {} gettime11 = {} gettime22 = {} # check() aas = 0 urls = dict(urls01.items() + urls11.items()) # print urls for fnames in urls.keys(): fd = fnames.split("|") fname = fd[0].strip() #print type(fname) urls1 = urls[fnames] a = chardet.detect(urls1) # print fname # print urls1 # print a typ = a['encoding'] typs = typ.strip() # ap="\""+typs+"\"" # #print ap # #print typ,type(typ) # data1 = urls1.decode(ap) # if ap!="ascii": # #urlll = data1.encode("utf-8") # print ap # print "p" # #print chardet.detect(urlll) # #print type(data1) # # #urlll= data1.encode("utf-8") # #print chardet.detect(urlll) try: if typs == "ascii": # aas = aas + 1 # print "aas:", aas Get, Host, Ref, UAgent, Cookie = rexurl(urls1) # print UAgent if len(Get) != 0 and UAgent == Realua: if Get not in gettimes: gettimes[Get] = fname else: tims1 = gettimes[Get] if Caltime(tims1, fname) > 120: gettimes[Get] = fname #print gettimes else: continue # print fname,Get # aas = aas + 1 # print "aas:", aas # aas = aas + 1 # # print "aas:", aas if len(Ref) == 0: # print Get #gettime1[Get]=fname if Get not in get01s: # print len(Get) get01s[Get] = 1 #gettime1[fname]=Get gettime1[Get] = fname #print gettime1 else: get01s[Get] = get01s[Get] + 1 # tims1=gettime1[Get] # if Caltime(tims1,fname)>120: # get01s[Get] = get01s[Get] + 1 #gettime1[fname] = Get else: # if UAgent==Realua: # print fname Ref = Ref.strip() getref[Get] = Ref #gettime2[Ref]=fname if Ref not in refs: refs[Ref] = 1 # gettime2[Ref] =fname #gettime2[fname]=Ref else: refs[Ref] = refs[Ref] + 1 # tims2=gettime1[Ref] # if Caltime(tims2, fname) > 120: # refs[Ref] = refs[Ref] + 1 #gettime2[fname] = Ref except: continue #print "gettimes:",len(gettime1) # for key in gettime1.keys(): # # keys=gettime1[key].encode("utf-8") # # a = chardet.detect(keys) # # print a # key=key.strip() # #print key # #print gettime1[key] # # print "http://www.genshuixue.com/cd/" # st="http://edu.iqiyi.com/" # # print chardet.detect(st) # st1 = st.strip() # #print st1 # if key==st1: # print key # key=key+" " # print gettime1[key] #print gettime1['http://www.genshuixue.com/cd/'] for get0s in get01s.keys(): get0 = get0s.strip() if get0 in refs.keys() and back(get0) != 0: userurl0.append(get0) #print get0 #get0=get0.strip() # fa1=gettime1[get0s] # #print fa1 # gettime11[fa1]=get0s us = userurl0[:] #print gettime11 # print "len(us):",len(us) refss = fitref(refs) for ref1 in refss.keys(): ref = ref1.strip() for getr in getref.keys(): gref1 = getref[getr] getr = getr.strip() if ref == getr and back(getr) != 0: if gref1 not in userurl1: userurl1.append(gref1) # fa2=gettime2[ref1] # #print fa2 # gettime22[fa2]=ref1 else: break #print len(userurl1) #print userurl1 userurl0.extend(refs.keys()) # print userurl0 # gettime2s=gettime2[:] for url0 in userurl0: url0 = url0.strip() #print url0 if url0 in userurl1: realurl.append(url0) # print url0 # fa2=gettime2[url0] # print fa2 # gettime22[fa2]=url0 #realurl.extend(us) #print realurl #print gettime22 # gettimes=dict(gettime1.items()+gettime2.items()) # print gettimes realurl.extend(us) realurls1 = set(realurl) realurls = list(realurls1) #print gettimes #maxT = maxtime2(gettimes, realurls) #print "maxT:",maxT print 'kkk:', len(realurls) return realurls, gettimes
def usermt11(urls01, urls11, Realua): refs = {} userurl0 = [] userurl1 = [] realurl = [] get01s = {} getref = {} #getTime={} gettimes={} # global gettime1 # global gettime2 gettime1 = {} gettime2 = {} gettime11 = {} gettime22 = {} # check() aas = 0 urls = dict(urls01.items() + urls11.items()) # print urls for fnames in urls.keys(): fd=fnames.split("|") fname=fd[0].strip() #print type(fname) urls1 = urls[fnames] a = chardet.detect(urls1) # print fname # print urls1 # print a typ = a['encoding'] typs = typ.strip() # ap="\""+typs+"\"" # #print ap # #print typ,type(typ) # data1 = urls1.decode(ap) # if ap!="ascii": # #urlll = data1.encode("utf-8") # print ap # print "p" # #print chardet.detect(urlll) # #print type(data1) # # #urlll= data1.encode("utf-8") # #print chardet.detect(urlll) try: if typs == "ascii": # aas = aas + 1 # print "aas:", aas Get, Host, Ref, UAgent, Cookie = rexurl(urls1) # print UAgent if len(Get) != 0 and UAgent==Realua: if Get not in gettimes: gettimes[Get]=fname else: tims1=gettimes[Get] if Caltime(tims1,fname)>120: gettimes[Get]=fname #print gettimes else: continue # print fname,Get # aas = aas + 1 # print "aas:", aas # aas = aas + 1 # # print "aas:", aas if len(Ref) == 0: # print Get #gettime1[Get]=fname if Get not in get01s: # print len(Get) get01s[Get]=1 #gettime1[fname]=Get gettime1[Get]=fname #print gettime1 else: get01s[Get] = get01s[Get] + 1 # tims1=gettime1[Get] # if Caltime(tims1,fname)>120: # get01s[Get] = get01s[Get] + 1 #gettime1[fname] = Get else: # if UAgent==Realua: # print fname Ref = Ref.strip() getref[Get] = Ref #gettime2[Ref]=fname if Ref not in refs: refs[Ref] = 1 # gettime2[Ref] =fname #gettime2[fname]=Ref else: refs[Ref]=refs[Ref] + 1 # tims2=gettime1[Ref] # if Caltime(tims2, fname) > 120: # refs[Ref] = refs[Ref] + 1 #gettime2[fname] = Ref except: continue #print "gettimes:",len(gettime1) # for key in gettime1.keys(): # # keys=gettime1[key].encode("utf-8") # # a = chardet.detect(keys) # # print a # key=key.strip() # #print key # #print gettime1[key] # # print "http://www.genshuixue.com/cd/" # st="http://edu.iqiyi.com/" # # print chardet.detect(st) # st1 = st.strip() # #print st1 # if key==st1: # print key # key=key+" " # print gettime1[key] #print gettime1['http://www.genshuixue.com/cd/'] for get0s in get01s.keys(): get0 = get0s.strip() if get0 in refs.keys() and back(get0)!=0: userurl0.append(get0) #print get0 #get0=get0.strip() # fa1=gettime1[get0s] # #print fa1 # gettime11[fa1]=get0s us = userurl0[:] #print gettime11 # print "len(us):",len(us) refss = fitref(refs) for ref1 in refss.keys(): ref = ref1.strip() for getr in getref.keys(): gref1=getref[getr] getr=getr.strip() if ref==getr and back(getr)!=0: if gref1 not in userurl1: userurl1.append(gref1) # fa2=gettime2[ref1] # #print fa2 # gettime22[fa2]=ref1 else: break #print len(userurl1) #print userurl1 userurl0.extend(refs.keys()) # print userurl0 # gettime2s=gettime2[:] for url0 in userurl0: url0 = url0.strip() #print url0 if url0 in userurl1: realurl.append(url0) # print url0 # fa2=gettime2[url0] # print fa2 # gettime22[fa2]=url0 #realurl.extend(us) #print realurl #print gettime22 # gettimes=dict(gettime1.items()+gettime2.items()) # print gettimes realurl.extend(us) realurls1=set(realurl) realurls=list(realurls1) #print gettimes #maxT = maxtime2(gettimes, realurls) #print "maxT:",maxT print 'kkk:', len(realurls) return realurls,gettimes # for get1 in getref.keys(): # gref1=getref[get1] # gref1=gref1.strip() # #print "gref1",gref1 # if get0==gref1 and get0 in refs.keys(): # #print get0 # if get0 not in userurl0: # userurl0.append(get0) # userurl1.extend(userurl0) # #print userurl1 # if getu(get1,refs,userurl1)==0: # userurl1.append(get1) # print refs.keys() # print "aas:",aas # print "len(get01s):",len(get01s) # print "len(refs):", len(refs) # # check(fitref(refs)) # # print refs # refss = fitref(refs) # simpile fittler T>=10 # # print "len(refss):", len(refss)
def maxtime2(gettimes, realurl): maxT = {} urltime = {} maxs = " " maxtimes = "2012/8/16 1:28:33" lentime = len(realurl) #print "len(lentime):",lentime cont = 0 #print realurl for ti in gettimes.keys(): ftime = gettimes[ti] ftime = ftime.strip() # ftime=ftime.replace('/','-') #print chardet.detect(ftime) # print ftime # cont = cont + 1 # lentime=len(gettimes) tis = ti.strip() if back(tis) != 0 and tis in realurl: urltime[ftime] = tis #print maxtimes #print ftime cont = cont + 1 #print tis date1 = datetime.datetime.strptime(maxtimes, "%Y/%m/%d %H:%M:%S") date2 = datetime.datetime.strptime(ftime, "%Y/%m/%d %H:%M:%S") #print cont if cont == lentime: maxT[maxtimes] = urltime[maxtimes] return maxT #print ftime #print date2 # # date1=datetime.datetime(date1[0],date1[1],date1[2],date1[3],date1[4],date1[5]) # # date2=datetime.datetime(date2[0],date2[1],date2[2],date2[3],date2[4],date2[5]) # # return date1-date2 # #print date1 if date1 < date2: #cont = cont + 1 #print cont maxtimes = ftime print maxtimes #print cont # cont = cont + 1 # print cont #return date1 else: continue # maxs =ftime #return date2 # print maxs # maxtime2 = Caltime2(maxtime, ftime) # maxtime=maxtime2 #print maxtime # maxT[maxtime] = gettimes[tis] # print ftime else: continue
isGet = re.compile(r'GET(.+?);') Get = re.findall(isGet, url) Get = "".join(Get) #print Get #print fname,type(Get) #break isHost = re.compile(r'Host:(.+?);') Host = re.findall(isHost, url) #print Host isReferer = re.compile(r'Referer:(.+?);') Referer = re.findall(isReferer, url) #print Referer isUser = re.compile(r'User-Agent:(.+?)\);') UserAgent = re.findall(isUser, url) #print UserAgent a = back(Get) #print a #break if a == 0: bb = bb + 1 #cursor.execute("insert into backurl (fname,url) values (%s,%s)",[fname,Get]) #print fname, Get #cursor.execute("delete from gethttp where fname='%s'" %fname) #conn.commit() else: aa = aa + 1 b = soft(Get) if b == 0: b2 = b2 + 1 print fname, Get
def ReURL(ua, results): #print ua refs = {} #print refs userurl0 = [] userurl1 = [] realurl = [] URL = [] get01s = {} getref = {} #print ua for row in results: try: fnames = row[0].encode("utf-8") fd = fnames.split("|") times = fd[0].strip() url = row[1].encode("utf-8") surl = url.replace('\t', '').replace('\n', ';;') #print times Get, Host, Ref, UAgent, Cookie = rexurlg(surl) #print Get #if len(Get) == 0 and ua != UAgent: if UAgent == ua and len(Get) != 0: if len(Ref) == 0: # print Get if Get not in get01s: # print len(Get) get01s[Get] = 1 else: get01s[Get] = get01s[Get] + 1 else: Ref = Ref.strip() if http(Get) != 0: getref[Get] = Ref if Ref not in refs: refs[Ref] = 1 else: refs[Ref] = refs[Ref] + 1 else: continue else: continue except: continue #print refs #print "len(refs):",refs for get0 in get01s.keys(): get0 = get0.strip() #if get0 in refs.keys() and back(get0) != 0: if get0 in refs.keys(): userurl0.append(get0) us = userurl0[:] #print us #print "len(us):",get01s refss = fitref(refs) #print "len(refss):",len(getref) userurl1GR = {} for ref in refss.keys(): ref = ref.strip() for getr in getref.keys(): gref1 = getref[getr] getr = getr.strip() if ref == getr: #userurl1.append(gref1) userurl1GR[gref1] = getr if gref1 not in userurl1: userurl1.append(gref1) # userurl1GR[gref1]=getr else: break # for urlgref in userurl1GR.keys(): # rget = userurl1GR[urlgref] # #if urlgref in realurl and rget not in realurl: # if urlgref in userurl1: # if back(rget) != 0: # URL.append(rget) #print "userurl1:",userurl1 #print "len(userurl1)",len(userurl1) userurl0.extend(refss.keys()) # print userurl0 for url0 in userurl0: url0 = url0.strip() if url0 in userurl1: realurl.append(url0) for urlgref in userurl1GR.keys(): rget = userurl1GR[urlgref] if urlgref in realurl and rget not in realurl: #if urlgref in realurl: if back(rget) != 0: URL.append(rget) #print realurl #print "last:",URL #checklist(URL) # realurl.extend(us) URL.extend(us) #print realurl realurl.extend(URL) realurls1 = set(realurl) realurls2 = list(realurls1) #print realurls #print 'len of possible urls:', len(realurls) return realurls2