Esempio n. 1
0
def usermt2(urls01,urls11,Realua):
    refs={}
    userurl0=[]
    userurl1=[]
    realurl=[]
    get01s={}
    getref={}
    #check()
    urls= dict(urls01.items() + urls11.items())
    # print urls
    for fname in urls:
        url=urls[fname]
        url=url.encode("utf-8")
        Get, Host, Ref, UAgent, Cookie = rexurl(url)
        if len(Get)!=0 and UAgent==Realua:
            if len(Ref)==0:
                if Get not in get01s:
                    get01s[Get]=1
                else:
                    get01s[Get]=get01s[Get]+1
            else:
                # if UAgent==Realua:
                    #print fname
                getref[Get]=Ref
                if Ref not in refs:
                    refs[Ref]=1
                else:
                    refs[Ref]=refs[Ref]+1

    refss=fitref(refs)  #simpile fittler T>=10
    for get0 in get01s.keys():
        # print "get0:",get0
        get0 = get0.strip()
        if get0 in refs.keys() and back(get0) != 0:
            userurl0.append(get0)
    us = userurl0[:]
    for ref in refss.keys():
        ref=ref.strip()
        for getr in getref.keys():
            gref1 = getref[getr]
            getr=getr.strip()
            if ref==getr and back(getr)!=0:
                if gref1 not in userurl1:
                    userurl1.append(gref1)
                else:
                    break


    userurl0.extend(refs.keys())
    for url0 in userurl0:
        url0=url0.strip()
        if url0 in userurl1:
           realurl.append(url0)

    realurl.extend(us)
    realurls1 = set(realurl)
    realurls = list(realurls1)
    return realurls
Esempio n. 2
0
def getu(get, refs, userurl1):
    get = get.strip()
    if back(get) != 0:
        if get in refs.keys() and refs[get] >= 10:
            if len(userurl1) != 0:
                if get in userurl1:
                    return 0
Esempio n. 3
0
def getu(get,refs,userurl1):
    get=get.strip()
    if back(get)!=0:
        if get in refs.keys() and refs[get]>=10:
            if len(userurl1)!=0:
                if get in userurl1:
                    return 0
Esempio n. 4
0
def fitref(refs):
    frefs={}
    for re in refs.keys():
        numref=refs[re]
        if numref>=10 and back(re)!=0:
            frefs[re]=numref
    return frefs
Esempio n. 5
0
def fitref(refs):
    frefs = {}
    for re in refs.keys():
        numref = refs[re]
        if numref >= 10 and back(re) != 0:
            frefs[re] = numref
    return frefs
Esempio n. 6
0
def fback(results):
  backs={}
  urls={}
  h_totals={}
  r_totals={}
  u_totals={}
  get=[]
  for row in results:
    fname = row[1].encode("utf-8")
    surl = row[2].encode("utf-8")
    url=surl.replace('\t','').replace('\n',';;')

    isGet=re.compile(r'[POS|GE]T(.+?)HTTP/1.[0|1];;')
    Gets=re.findall(isGet,url)
    Get="".join(Gets)[1:]
    #Get=Get[1:]
    if len(Get)==0:
        continue
    else:
        get.append(Get)
        isHost=re.compile(r'Host:(.+?);;')
        Host=re.findall(isHost,url)
        Host="".join(Host)[1:]
        if Host not in h_totals:
            h_totals[Host]=1
        else:
            h_totals[Host]=h_totals[Host]+1

        isReferer=re.compile(r'Referer:(.+?);;')
        Ref=re.findall(isReferer,url)
        Ref="".join(Ref)[1:]
        if Ref not in r_totals:
            r_totals[Ref]=1
        else:
            r_totals[Ref]=r_totals[Ref]+1

        isUser=re.compile(r'User-Agent:(.+?);;')
        UAgent=re.findall(isUser,url)
        UAgent="".join(UAgent)[1:]
        if UAgent not in u_totals:
            u_totals[UAgent]=1
        else:
            u_totals[UAgent]=u_totals[UAgent]+1

        a=back(Get)
        if a==0:
            backs[fname]=surl
        else:
            urls[fname]=surl
  return backs,urls,h_totals,r_totals,u_totals,get
Esempio n. 7
0
def get11ref(urls):
    cheurl={}
    b=0
    for u in urls.keys():
        a=back(u)
        if a==0:
            b = b + 1
            cheurl[u] = urls[u]
            # continue
        else:
            continue
            # b=b+1
            # cheurl[u]=urls[u]
    return cheurl,b
Esempio n. 8
0
def fback(results):
  backs={}
  urls={}
  h_totals={}
  r_totals={}
  u_totals={}

  for row in results:
    fname = row[1].encode("utf-8")
    surl = row[2].encode("utf-8")
    url=surl.replace('\t','').replace('\n',';;')

    isGet=re.compile(r'[POST|GET](.+?);')
    Get=re.findall(isGet,url)
    Get="".join(Get)

    isHost=re.compile(r'Host:(.+?);')
    Host=re.findall(isHost,url)
    Host="".join(Host)
    if Host not in h_totals:
      h_totals[Host]=1
    else:
      h_totals[Host]=h_totals[Host]+1

    isReferer=re.compile(r'Referer:(.+?);')
    Ref=re.findall(isReferer,url)
    Ref="".join(Ref)
    if Ref not in r_totals:
      r_totals[Ref]=1
    else:
      r_totals[Ref]=r_totals[Ref]+1

    isUser=re.compile(r'User-Agent:(.+?)\);')
    UAgent=re.findall(isUser,url)
    UAgent="".join(UAgent)
    if UAgent not in u_totals:
      u_totals[UAgent]=1
    else:
      u_totals[UAgent]=u_totals[UAgent]+1

    a=back(Get)
    if a==0:
      backs[fname]=surl
    else:
      urls[fname]=surl
  return backs,urls,h_totals,r_totals,u_totals
Esempio n. 9
0
def main():
    filename = sys.argv[-1]  # just take the last argument

    # this is not the right way to do flags
    if filename == '--no-file' or filename == "-n":
        noFile()

    elif filename == '-h' or filename == '--help' or filename == 'main.py':
        help()

    elif filename == '--view-overhead':
        overhead()

    # any other flag
    else:
        suffix = filename[-4:]  # the last flag
        if filename[0] == '-':
            print('Invalid flag.\nRun with -h for help.')
            return
        elif suffix != '.txt':
            print('Invalid filetype.\nRun with -h for help.')
            return

        # if it's a .txt, go here:
        with open(filename) as givenFile:
            translate = givenFile.read()
            print(type(translate))
            if translate.find('●') != -1 or translate.find(
                    '○') != -1:  # if a clack
                decoded = open("result.txt", "w+")
                # why is there an invalid charcter that need to be removed when decoding?
                decoded.write(back.back(translate))  #[:-1])
                decoded.close()
            else:  # if not a clack
                encoded = open("result.txt", "w+")
                encoded.write(clack.clack(translate, False))
                encoded.close
Esempio n. 10
0
def noFile():
    selLoop = True  # selection loop
    while selLoop:
        print("Type 'done' on a new line when finished typing message\n")

        translate = ''
        while True:
            # continue adding text until user types
            # 'done' on a new line
            tmpTxt = input()
            if tmpTxt == 'done':
                break
            translate += tmpTxt

        if translate.find('●') != -1 or translate.find('○') != -1:
            print('\nResults:\n' + '"' + back.back(translate) + '"')
            selLoop = False

        elif translate == '':
            selLoop = False

        else:
            print('\nResult:' + clack.clack(translate, False))
            selLoop = False
Esempio n. 11
0
 def __init__(self):
     self.b = back.back()
Esempio n. 12
0
def ReURL(ua,results):
    #print ua
    refs = {}
    #print refs
    userurl0=[]
    userurl1=[]
    realurl=[]
    URL=[]
    get01s={}
    getref={}
    #print ua
    for row in results:
        try:
            fnames = row[0].encode("utf-8")
            fd = fnames.split("|")
            times = fd[0].strip()
            url = row[1].encode("utf-8")
            surl = url.replace('\t', '').replace('\n', ';;')
            #print times
            Get, Host, Ref, UAgent, Cookie = rexurlg(surl)
            #print Get
            #if len(Get) == 0 and ua != UAgent:
            if UAgent==ua and len(Get) != 0:
                if len(Ref) == 0:
                    # print Get
                    if Get not in get01s:
                        # print len(Get)
                        get01s[Get] = 1
                    else:
                        get01s[Get]=get01s[Get] + 1
                else:
                    Ref = Ref.strip()
                    if http(Get)!=0:
                        getref[Get]=Ref
                        if Ref not in refs:
                            refs[Ref] = 1
                        else:
                            refs[Ref] = refs[Ref] + 1
                    else:
                        continue
            else:
                continue
        except:
            continue
    #print refs
    #print "len(refs):",refs
    for get0 in get01s.keys():
        get0 = get0.strip()
        #if get0 in refs.keys() and back(get0) != 0:
        if get0 in refs.keys():
            userurl0.append(get0)
    us=userurl0[:]
    #print us
    #print "len(us):",get01s
    refss = fitref(refs)
    #print "len(refss):",len(getref)
    userurl1GR={}
    for ref in refss.keys():
        ref = ref.strip()
        for getr in getref.keys():
            gref1 = getref[getr]
            getr = getr.strip()
            if ref == getr:
                #userurl1.append(gref1)
                userurl1GR[gref1]=getr
                if gref1 not in userurl1:
                    userurl1.append(gref1)
                #     userurl1GR[gref1]=getr
                else:
                    break

    # for urlgref in userurl1GR.keys():
    #     rget = userurl1GR[urlgref]
    #     #if urlgref in realurl and rget not in realurl:
    #     if urlgref in userurl1:
    #         if back(rget) != 0:
    #             URL.append(rget)
    #print "userurl1:",userurl1
    #print "len(userurl1)",len(userurl1)
    userurl0.extend(refss.keys())
    # print userurl0
    for url0 in userurl0:
        url0 = url0.strip()
        if url0 in userurl1:
            realurl.append(url0)

    for urlgref in userurl1GR.keys():
        rget = userurl1GR[urlgref]
        if urlgref in realurl and rget not in realurl:
        #if urlgref in  realurl:
            if back(rget) != 0:
                URL.append(rget)
    #print realurl
    #print "last:",URL
    #checklist(URL)
    # realurl.extend(us)

    URL.extend(us)
    #print realurl
    realurl.extend(URL)
    realurls1 = set(realurl)
    realurls2 = list(realurls1)
    #print realurls
    #print 'len of possible urls:', len(realurls)
    return realurls2
Esempio n. 13
0
    isGet=re.compile(r'GET(.+?);')
    Get=re.findall(isGet,url)
    Get="".join(Get)
    #print Get
    #print fname,type(Get)
    #break
    isHost=re.compile(r'Host:(.+?);')
    Host=re.findall(isHost,url)
    #print Host
    isReferer=re.compile(r'Referer:(.+?);')
    Referer=re.findall(isReferer,url)
    #print Referer
    isUser=re.compile(r'User-Agent:(.+?)\);')
    UserAgent=re.findall(isUser,url)
    #print UserAgent
    a=back(Get)
    #print a
    #break
    if a==0:
      bb=bb+1
      #cursor.execute("insert into backurl (fname,url) values (%s,%s)",[fname,Get])

      #print fname, Get
      #cursor.execute("delete from gethttp where fname='%s'" %fname)
      #conn.commit()
    else:
      aa=aa+1
      b=soft(Get)
      if b==0:
        b2=b2+1
        print fname,Get
Esempio n. 14
0
def usermt1(urls01, urls11, Realua):
        refs={}
        userurl0=[]
        userurl1=[]
        realurl=[]
        get01s={}
        getref={}
        #check()
        aas=0
        urls=dict(urls01.items() + urls11.items())
        #print urls
        for fname in urls.keys():
            urls1 = urls[fname]
            a=chardet.detect(urls1)
            typ=a['encoding']
            typs=typ.strip()
            try:
                if typs=="ascii":
                    Get, Host, Ref, UAgent, Cookie = rexurl(urls1)
                    if len(Get) != 0 and UAgent == Realua:
                        if len(Ref) == 0:
                            if Get not in get01s:
                                get01s[Get] = 1
                            else:
                                get01s[Get] = get01s[Get] + 1
                        else:
                            Ref=Ref.strip()
                            getref[Get] = Ref
                            if Ref not in refs:
                                refs[Ref] = 1
                            else:
                                refs[Ref] = refs[Ref] + 1
            except:
                continue

        for get0 in get01s.keys():
            get0=get0.strip()
            if get0 in refs.keys() and back(get0)!=0:
                userurl0.append(get0)
        us=userurl0[:]
        refss = fitref(refs)
        for ref in refss.keys():
            ref = ref.strip()
            for getr in getref.keys():
                gref1 = getref[getr]
                getr = getr.strip()
                if ref == getr and back(getr)!=0:
                    if gref1 not in userurl1:
                        userurl1.append(gref1)
                    else:
                        break

        userurl0.extend(refs.keys())
        for url0 in userurl0:
            url0 = url0.strip()
            if url0 in userurl1:
                realurl.append(url0)

        realurl.extend(us)
        realurls1=set(realurl)
        realurls=list(realurls1)

        print 'first judge realurls:',len(realurls)
        return realurls
Esempio n. 15
0
def maxtime2(gettimes,realurl):
    maxT={}
    urltime={}
    maxs=" "
    maxtimes = "2012/8/16 1:28:33"
    lentime = len(realurl)
    #print "len(lentime):",lentime
    cont = 0
    #print realurl
    for ti in gettimes.keys():
        ftime=gettimes[ti]
        ftime = ftime.strip()
        # ftime=ftime.replace('/','-')
        #print chardet.detect(ftime)

        # print ftime
        # cont = cont + 1
        # lentime=len(gettimes)
        tis=ti.strip()

        if back(tis)!=0 and tis in realurl:
            urltime[ftime]=tis
            #print maxtimes
            #print ftime
            cont = cont + 1
            #print tis
            date1 = datetime.datetime.strptime(maxtimes, "%Y/%m/%d %H:%M:%S")
            date2 = datetime.datetime.strptime(ftime, "%Y/%m/%d %H:%M:%S")
            #print cont
            if cont==lentime:
                maxT[maxtimes]=urltime[maxtimes]
                return maxT
            #print ftime

            #print date2
            # # date1=datetime.datetime(date1[0],date1[1],date1[2],date1[3],date1[4],date1[5])
            # # date2=datetime.datetime(date2[0],date2[1],date2[2],date2[3],date2[4],date2[5])
            # # return date1-date2
            # #print date1
            if date1 <date2:
                #cont = cont + 1
                #print cont
                maxtimes=ftime
                print maxtimes

                #print cont
                # cont = cont + 1
                # print cont
                #return date1
            else:
                continue
                # maxs =ftime
                #return date2
            # print maxs
            # maxtime2 = Caltime2(maxtime, ftime)
            # maxtime=maxtime2
            #print maxtime
            # maxT[maxtime] = gettimes[tis]
            # print ftime


        else:
            continue
Esempio n. 16
0
# -*- coding: UTF-8 -*-
#Auther Li Mengxing
#descrpition:程序主入口
import time
from transitions import Machine
from surround import surround
from back import back
from land import land
from launch import launch
#from surround import transitions, states

# model = surround()
# model2 = back()
#分别创建四个实例
surrounder = surround()
backer = back()
lander = land()
launcher = launch()

#状态定义
status = ['READY','WAITING','WORKING','DONE']

#设置状态转移
transitions = [
    {'trigger': 'initialed', 'source': 'READY', 'dest': 'WAITING' },
    {'trigger': 'wating_over', 'source': 'WAITING', 'dest': 'WORKING' },
    {'trigger': 'work_done', 'source': 'WORKING', 'dest': 'DONE' },
    {'trigger': 'ready','source':'DONE','dest':'READY'}
]

#分别设置各自的状态机
Esempio n. 17
0
def usermt(urls01, urls11, Realua):
    refs = {}
    userurl0=[]
    userurl1=[]
    realurl=[]
    get01s={}
    getref={}
    ac=0
    urls = dict(urls01.items() + urls11.items())
    for fname in urls.keys():
        urls1 = urls[fname]
        a = chardet.detect(urls1)
        #print fname
        # ac=ac+1
        # print urls1
        # print ac
        #print a
        typ = a['encoding']
        typs = typ.strip()
        # ap="\""+typs+"\""
        # #print ap
        # #print typ,type(typ)
        # data1 = urls1.decode(ap)
        # if ap!="ascii":
        #     #urlll = data1.encode("utf-8")
        #     print ap
        #     print "p"
        #     #print chardet.detect(urlll)
        # #print type(data1)
        #
        # #urlll= data1.encode("utf-8")
        # #print chardet.detect(urlll)

        try:
            Get, Host, Ref, UAgent, Cookie = rexurl(urls1)
            if len(Get) != 0 and UAgent == Realua:
                # ac = ac + 1
                # print urls1
                # print ac
                if len(Ref) == 0:
                    if Get not in get01s:
                        get01s[Get] = 1
                    else:
                        get01s[Get] = get01s[Get] + 1
                else:
                    Ref = Ref.strip()
                    getref[Get] = Ref
                    if Ref not in refs:
                        refs[Ref] = 1
                    else:
                        refs[Ref] = refs[Ref] + 1
        except:
            print fname
            continue

    for get0 in get01s.keys():
        get0 = get0.strip()
        if get0 in refs.keys() and back(get0) != 0:
            userurl0.append(get0)
    us = userurl0[:]
    print "len(us):",len(us)
    refss = fitref(refs)
    for ref in refss.keys():
        ref = ref.strip()
        for getr in getref.keys():
            gref1 = getref[getr]
            getr = getr.strip()
            if ref == getr and back(getr) != 0:
                if gref1 not in userurl1:
                    userurl1.append(gref1)
                else:
                    break

    # print len(userurl1)
    userurl0.extend(refs.keys())
    # print userurl0
    for url0 in userurl0:
        url0 = url0.strip()
        if url0 in userurl1:
            realurl.append(url0)  # realurl.extend(us)

    realurl.extend(us)
    realurls1 = set(realurl)
    realurls = list(realurls1)

    print 'kkk:', len(realurls)
    return realurls

    # for get1 in getref.keys():
    #     gref1=getref[get1]
    #     gref1=gref1.strip()
    #     #print "gref1",gref1
    #     if get0==gref1 and get0 in refs.keys():
    #         #print get0
    #         if get0 not in userurl0:
    #             userurl0.append(get0)
    #             userurl1.extend(userurl0)
    #             #print userurl1
    #     if getu(get1,refs,userurl1)==0:
    #         userurl1.append(get1)
    # print refs.keys()
    # print "aas:",aas
    # print "len(get01s):",len(get01s)
    # print "len(refs):", len(refs)
    # # check(fitref(refs))
    # # print refs
    # refss = fitref(refs)  # simpile fittler T>=10
    # # print "len(refss):", len(refss)
Esempio n. 18
0
def usermt11(urls01, urls11, Realua):
    refs = {}
    userurl0 = []
    userurl1 = []
    realurl = []
    get01s = {}
    getref = {}
    #getTime={}
    gettimes = {}
    # global gettime1
    # global gettime2
    gettime1 = {}
    gettime2 = {}
    gettime11 = {}
    gettime22 = {}
    # check()
    aas = 0
    urls = dict(urls01.items() + urls11.items())
    # print urls
    for fnames in urls.keys():
        fd = fnames.split("|")
        fname = fd[0].strip()
        #print type(fname)
        urls1 = urls[fnames]
        a = chardet.detect(urls1)
        # print fname
        # print urls1
        # print a
        typ = a['encoding']
        typs = typ.strip()
        # ap="\""+typs+"\""
        # #print ap
        # #print typ,type(typ)
        # data1 = urls1.decode(ap)
        # if ap!="ascii":
        #     #urlll = data1.encode("utf-8")
        #     print ap
        #     print "p"
        #     #print chardet.detect(urlll)
        # #print type(data1)
        #
        # #urlll= data1.encode("utf-8")
        # #print chardet.detect(urlll)

        try:
            if typs == "ascii":
                # aas = aas + 1
                # print "aas:", aas
                Get, Host, Ref, UAgent, Cookie = rexurl(urls1)
                # print UAgent
                if len(Get) != 0 and UAgent == Realua:
                    if Get not in gettimes:
                        gettimes[Get] = fname
                    else:
                        tims1 = gettimes[Get]
                        if Caltime(tims1, fname) > 120:
                            gettimes[Get] = fname
                            #print gettimes
                        else:
                            continue
                    # print fname,Get
                    # aas = aas + 1
                    # print "aas:", aas
                    #     aas = aas + 1
                    #     # print "aas:", aas
                    if len(Ref) == 0:
                        # print Get
                        #gettime1[Get]=fname
                        if Get not in get01s:
                            # print len(Get)
                            get01s[Get] = 1
                            #gettime1[fname]=Get
                            gettime1[Get] = fname
                            #print gettime1
                        else:
                            get01s[Get] = get01s[Get] + 1
                            # tims1=gettime1[Get]
                            # if Caltime(tims1,fname)>120:
                            #     get01s[Get] = get01s[Get] + 1

                            #gettime1[fname] = Get

                    else:
                        # if UAgent==Realua:
                        # print fname
                        Ref = Ref.strip()
                        getref[Get] = Ref
                        #gettime2[Ref]=fname
                        if Ref not in refs:
                            refs[Ref] = 1
                            # gettime2[Ref] =fname
                            #gettime2[fname]=Ref
                        else:
                            refs[Ref] = refs[Ref] + 1
                            # tims2=gettime1[Ref]
                            # if Caltime(tims2, fname) > 120:
                            #     refs[Ref] = refs[Ref] + 1

                            #gettime2[fname] = Ref
        except:
            continue

    #print "gettimes:",len(gettime1)
    # for key in gettime1.keys():
    #     # keys=gettime1[key].encode("utf-8")
    #     # a = chardet.detect(keys)
    #     # print a
    #     key=key.strip()
    #     #print key
    #     #print gettime1[key]
    #     # print "http://www.genshuixue.com/cd/"
    #     st="http://edu.iqiyi.com/"
    #     # print chardet.detect(st)
    #     st1 = st.strip()
    #     #print st1
    #     if key==st1:
    #         print key
    #         key=key+" "
    #         print gettime1[key]
    #print gettime1['http://www.genshuixue.com/cd/']

    for get0s in get01s.keys():
        get0 = get0s.strip()
        if get0 in refs.keys() and back(get0) != 0:
            userurl0.append(get0)
            #print get0
            #get0=get0.strip()
            # fa1=gettime1[get0s]
            # #print fa1
            # gettime11[fa1]=get0s
    us = userurl0[:]
    #print gettime11
    # print "len(us):",len(us)
    refss = fitref(refs)
    for ref1 in refss.keys():
        ref = ref1.strip()
        for getr in getref.keys():
            gref1 = getref[getr]
            getr = getr.strip()
            if ref == getr and back(getr) != 0:
                if gref1 not in userurl1:
                    userurl1.append(gref1)
                    # fa2=gettime2[ref1]
                    # #print fa2
                    # gettime22[fa2]=ref1
                else:
                    break

    #print len(userurl1)
    #print userurl1
    userurl0.extend(refs.keys())
    # print userurl0
    # gettime2s=gettime2[:]

    for url0 in userurl0:
        url0 = url0.strip()
        #print url0
        if url0 in userurl1:
            realurl.append(url0)
            # print url0
            # fa2=gettime2[url0]
            # print fa2
            # gettime22[fa2]=url0
            #realurl.extend(us)
    #print realurl
    #print gettime22
    # gettimes=dict(gettime1.items()+gettime2.items())
    # print gettimes

    realurl.extend(us)
    realurls1 = set(realurl)
    realurls = list(realurls1)
    #print gettimes
    #maxT = maxtime2(gettimes, realurls)
    #print "maxT:",maxT
    print 'kkk:', len(realurls)
    return realurls, gettimes
Esempio n. 19
0
def usermt11(urls01, urls11, Realua):
    refs = {}
    userurl0 = []
    userurl1 = []
    realurl = []
    get01s = {}
    getref = {}
    #getTime={}
    gettimes={}
    # global gettime1
    # global gettime2
    gettime1 = {}
    gettime2 = {}
    gettime11 = {}
    gettime22 = {}
    # check()
    aas = 0
    urls = dict(urls01.items() + urls11.items())
    # print urls
    for fnames in urls.keys():
        fd=fnames.split("|")
        fname=fd[0].strip()
       #print type(fname)
        urls1 = urls[fnames]
        a = chardet.detect(urls1)
        # print fname
        # print urls1
        # print a
        typ = a['encoding']
        typs = typ.strip()
        # ap="\""+typs+"\""
        # #print ap
        # #print typ,type(typ)
        # data1 = urls1.decode(ap)
        # if ap!="ascii":
        #     #urlll = data1.encode("utf-8")
        #     print ap
        #     print "p"
        #     #print chardet.detect(urlll)
        # #print type(data1)
        #
        # #urlll= data1.encode("utf-8")
        # #print chardet.detect(urlll)

        try:
            if typs == "ascii":
                # aas = aas + 1
                # print "aas:", aas
                Get, Host, Ref, UAgent, Cookie = rexurl(urls1)
                # print UAgent
                if len(Get) != 0 and UAgent==Realua:
                    if Get not in gettimes:
                        gettimes[Get]=fname
                    else:
                        tims1=gettimes[Get]
                        if Caltime(tims1,fname)>120:
                            gettimes[Get]=fname
                            #print gettimes
                        else:
                            continue
                    # print fname,Get
                    # aas = aas + 1
                    # print "aas:", aas
                    #     aas = aas + 1
                    #     # print "aas:", aas
                    if len(Ref) == 0:
                        # print Get
                        #gettime1[Get]=fname
                        if Get not in get01s:
                            # print len(Get)
                            get01s[Get]=1
                            #gettime1[fname]=Get
                            gettime1[Get]=fname
                            #print gettime1
                        else:
                            get01s[Get] = get01s[Get] + 1
                            # tims1=gettime1[Get]
                            # if Caltime(tims1,fname)>120:
                            #     get01s[Get] = get01s[Get] + 1

                                #gettime1[fname] = Get

                    else:
                        # if UAgent==Realua:
                        # print fname
                        Ref = Ref.strip()
                        getref[Get] = Ref
                        #gettime2[Ref]=fname
                        if Ref not in refs:
                            refs[Ref] = 1
                            # gettime2[Ref] =fname
                            #gettime2[fname]=Ref
                        else:
                            refs[Ref]=refs[Ref] + 1
                            # tims2=gettime1[Ref]
                            # if Caltime(tims2, fname) > 120:
                            #     refs[Ref] = refs[Ref] + 1

                                #gettime2[fname] = Ref
        except:
            continue

    #print "gettimes:",len(gettime1)
    # for key in gettime1.keys():
    #     # keys=gettime1[key].encode("utf-8")
    #     # a = chardet.detect(keys)
    #     # print a
    #     key=key.strip()
    #     #print key
    #     #print gettime1[key]
    #     # print "http://www.genshuixue.com/cd/"
    #     st="http://edu.iqiyi.com/"
    #     # print chardet.detect(st)
    #     st1 = st.strip()
    #     #print st1
    #     if key==st1:
    #         print key
    #         key=key+" "
    #         print gettime1[key]
    #print gettime1['http://www.genshuixue.com/cd/']

    for get0s in get01s.keys():
        get0 = get0s.strip()
        if get0 in refs.keys() and back(get0)!=0:
            userurl0.append(get0)
            #print get0
            #get0=get0.strip()
            # fa1=gettime1[get0s]
            # #print fa1
            # gettime11[fa1]=get0s
    us = userurl0[:]
    #print gettime11
    # print "len(us):",len(us)
    refss = fitref(refs)
    for ref1 in refss.keys():
        ref = ref1.strip()
        for getr in getref.keys():
            gref1=getref[getr]
            getr=getr.strip()
            if ref==getr and back(getr)!=0:
                if gref1 not in userurl1:
                    userurl1.append(gref1)
                    # fa2=gettime2[ref1]
                    # #print fa2
                    # gettime22[fa2]=ref1
                else:
                    break

    #print len(userurl1)
    #print userurl1
    userurl0.extend(refs.keys())
    # print userurl0
    # gettime2s=gettime2[:]

    for url0 in userurl0:
        url0 = url0.strip()
        #print url0
        if url0 in userurl1:
            realurl.append(url0)
            # print url0
            # fa2=gettime2[url0]
            # print fa2
            # gettime22[fa2]=url0
            #realurl.extend(us)
    #print realurl
    #print gettime22
    # gettimes=dict(gettime1.items()+gettime2.items())
    # print gettimes

    realurl.extend(us)
    realurls1=set(realurl)
    realurls=list(realurls1)
    #print gettimes
    #maxT = maxtime2(gettimes, realurls)
    #print "maxT:",maxT
    print 'kkk:', len(realurls)
    return realurls,gettimes

    # for get1 in getref.keys():
    #     gref1=getref[get1]
    #     gref1=gref1.strip()
    #     #print "gref1",gref1
    #     if get0==gref1 and get0 in refs.keys():
    #         #print get0
    #         if get0 not in userurl0:
    #             userurl0.append(get0)
    #             userurl1.extend(userurl0)
    #             #print userurl1
    #     if getu(get1,refs,userurl1)==0:
    #         userurl1.append(get1)
    # print refs.keys()
    # print "aas:",aas
    # print "len(get01s):",len(get01s)
    # print "len(refs):", len(refs)
    # # check(fitref(refs))
    # # print refs
    # refss = fitref(refs)  # simpile fittler T>=10
    # # print "len(refss):", len(refss)
Esempio n. 20
0
def maxtime2(gettimes, realurl):
    maxT = {}
    urltime = {}
    maxs = " "
    maxtimes = "2012/8/16 1:28:33"
    lentime = len(realurl)
    #print "len(lentime):",lentime
    cont = 0
    #print realurl
    for ti in gettimes.keys():
        ftime = gettimes[ti]
        ftime = ftime.strip()
        # ftime=ftime.replace('/','-')
        #print chardet.detect(ftime)

        # print ftime
        # cont = cont + 1
        # lentime=len(gettimes)
        tis = ti.strip()

        if back(tis) != 0 and tis in realurl:
            urltime[ftime] = tis
            #print maxtimes
            #print ftime
            cont = cont + 1
            #print tis
            date1 = datetime.datetime.strptime(maxtimes, "%Y/%m/%d %H:%M:%S")
            date2 = datetime.datetime.strptime(ftime, "%Y/%m/%d %H:%M:%S")
            #print cont
            if cont == lentime:
                maxT[maxtimes] = urltime[maxtimes]
                return maxT
            #print ftime

            #print date2
            # # date1=datetime.datetime(date1[0],date1[1],date1[2],date1[3],date1[4],date1[5])
            # # date2=datetime.datetime(date2[0],date2[1],date2[2],date2[3],date2[4],date2[5])
            # # return date1-date2
            # #print date1
            if date1 < date2:
                #cont = cont + 1
                #print cont
                maxtimes = ftime
                print maxtimes

                #print cont
                # cont = cont + 1
                # print cont
                #return date1
            else:
                continue
                # maxs =ftime
                #return date2
            # print maxs
            # maxtime2 = Caltime2(maxtime, ftime)
            # maxtime=maxtime2
            #print maxtime
            # maxT[maxtime] = gettimes[tis]
            # print ftime

        else:
            continue
Esempio n. 21
0
        isGet = re.compile(r'GET(.+?);')
        Get = re.findall(isGet, url)
        Get = "".join(Get)
        #print Get
        #print fname,type(Get)
        #break
        isHost = re.compile(r'Host:(.+?);')
        Host = re.findall(isHost, url)
        #print Host
        isReferer = re.compile(r'Referer:(.+?);')
        Referer = re.findall(isReferer, url)
        #print Referer
        isUser = re.compile(r'User-Agent:(.+?)\);')
        UserAgent = re.findall(isUser, url)
        #print UserAgent
        a = back(Get)
        #print a
        #break
        if a == 0:
            bb = bb + 1
            #cursor.execute("insert into backurl (fname,url) values (%s,%s)",[fname,Get])

            #print fname, Get
            #cursor.execute("delete from gethttp where fname='%s'" %fname)
            #conn.commit()
        else:
            aa = aa + 1
            b = soft(Get)
            if b == 0:
                b2 = b2 + 1
                print fname, Get
Esempio n. 22
0
def ReURL(ua, results):
    #print ua
    refs = {}
    #print refs
    userurl0 = []
    userurl1 = []
    realurl = []
    URL = []
    get01s = {}
    getref = {}
    #print ua
    for row in results:
        try:
            fnames = row[0].encode("utf-8")
            fd = fnames.split("|")
            times = fd[0].strip()
            url = row[1].encode("utf-8")
            surl = url.replace('\t', '').replace('\n', ';;')
            #print times
            Get, Host, Ref, UAgent, Cookie = rexurlg(surl)
            #print Get
            #if len(Get) == 0 and ua != UAgent:
            if UAgent == ua and len(Get) != 0:
                if len(Ref) == 0:
                    # print Get
                    if Get not in get01s:
                        # print len(Get)
                        get01s[Get] = 1
                    else:
                        get01s[Get] = get01s[Get] + 1
                else:
                    Ref = Ref.strip()
                    if http(Get) != 0:
                        getref[Get] = Ref
                        if Ref not in refs:
                            refs[Ref] = 1
                        else:
                            refs[Ref] = refs[Ref] + 1
                    else:
                        continue
            else:
                continue
        except:
            continue
    #print refs
    #print "len(refs):",refs
    for get0 in get01s.keys():
        get0 = get0.strip()
        #if get0 in refs.keys() and back(get0) != 0:
        if get0 in refs.keys():
            userurl0.append(get0)
    us = userurl0[:]
    #print us
    #print "len(us):",get01s
    refss = fitref(refs)
    #print "len(refss):",len(getref)
    userurl1GR = {}
    for ref in refss.keys():
        ref = ref.strip()
        for getr in getref.keys():
            gref1 = getref[getr]
            getr = getr.strip()
            if ref == getr:
                #userurl1.append(gref1)
                userurl1GR[gref1] = getr
                if gref1 not in userurl1:
                    userurl1.append(gref1)
                #     userurl1GR[gref1]=getr
                else:
                    break

    # for urlgref in userurl1GR.keys():
    #     rget = userurl1GR[urlgref]
    #     #if urlgref in realurl and rget not in realurl:
    #     if urlgref in userurl1:
    #         if back(rget) != 0:
    #             URL.append(rget)
    #print "userurl1:",userurl1
    #print "len(userurl1)",len(userurl1)
    userurl0.extend(refss.keys())
    # print userurl0
    for url0 in userurl0:
        url0 = url0.strip()
        if url0 in userurl1:
            realurl.append(url0)

    for urlgref in userurl1GR.keys():
        rget = userurl1GR[urlgref]
        if urlgref in realurl and rget not in realurl:
            #if urlgref in  realurl:
            if back(rget) != 0:
                URL.append(rget)
    #print realurl
    #print "last:",URL
    #checklist(URL)
    # realurl.extend(us)

    URL.extend(us)
    #print realurl
    realurl.extend(URL)
    realurls1 = set(realurl)
    realurls2 = list(realurls1)
    #print realurls
    #print 'len of possible urls:', len(realurls)
    return realurls2