def main(): global b, o, ct, w, s, urlq, want b = ctime() bt = b.split() ct = bt[1] + bt[2] urlq = Slist() s = Save() u = raw_input('Please input the url:') t = int(raw_input('Please input the number of thread you want: ')) n = int(raw_input('Please input the depth you want: ')) w = raw_input('Do you want do search some urls you want?(y/n): ') if w == 'y': want = raw_input('What do you want to search?: ') elif w == 'n': want = 0 else: want = 0 url = 'http://' + u m = url_parse(url) urlq.put(m) urlq.l_del_sa() for i in m: s.s_wirte(nadom, i) thread_list = [] for i in range(t): thread_list.append(Thread(target=forward,args=(n,))) for thread in thread_list: thread.start()
def search(self,page,keyword): search_list = Slist() what = r'(http://[\.\w\s\d//-=/?]*' + str(keyword) + r'+/*\d*[^>\'\"<]*)' search_re = re.compile(what, re.S) search_get = search_re.findall(page, re.S) search_list.put(search_get) search_list.l_del_sa() return search_list.li2
def search(page,date): search_list = Slist() #http://[\.\w\s\d//-=/?]*(vuls)+/*\d*[^><'"]* what = r'(http://[\.\w\s\d//-=/?]*' + str(date) + r'+/*\d*[^>\'\"<]*)' search_re = re.compile(what, re.S) search_get = search_re.findall(page, re.S) search_list.put(search_get) search_list.l_del_sa() return search_list.li2
def url_parse(url): global nadom, nerror adom = domain(url) nadom = 'thread_' + str(adom) + '_' + str(ct) nerror = 'error_' + str(adom) + '_' + str(ct) try: page = urllib2.urlopen(url) html = page.read() if want != 0: par_url = search(html, want) else: url_re = re.compile(r'http://[\.\w//]+',re.S) par_url = url_re.findall(html,re.S) urld = Slist() urld.put(par_url) urld.l_del_sa() for i in urld.li2: for i in urld.li2: for i in urld.li2: i_dom = domain(i) if i_dom != adom: urld.l_del_da(i) s.s_wirte(nerror, i) urls = urld.li2 return urls except: return 0
class WSR(object): isearch = raw_input('Need search?(y/n)') if isearch == 'y': keyword = raw_input('Keyword(only support english):') elif isearch == 'n': keyword = '' else: keyword = '' def __init__(self): self.s1 = Slist() def imit(self,url): date = Slist() adom = domain(url) try: page = urllib2.urlopen(url) html = page.read() if self.keyword != '': date.put(self.search(html,self.keyword)) date.l_del_sa() else: url_re = re.compile(r'http://[\.\w//]+',re.S) date.put(url_re.findall(html,re.S)) date.l_del_sa() for i in date.li2: for i in date.li2: for i in date.li2: dom = domain(i) if dom != adom: date.l_del_da(i) self.s1.put(date.li2) self.s1.l_del_sa() except: self.s1.put([]) return self.s1.li2 def go(self,n): while n > 0: u = self.s1.get() aa = self.imit(u) if aa == 0: self.go(n) else: self.s1.put(aa) self.s1.l_del_sa() n = n -1 self.go(n) else: print 'Done' def search(self,page,keyword): search_list = Slist() what = r'(http://[\.\w\s\d//-=/?]*' + str(keyword) + r'+/*\d*[^>\'\"<]*)' search_re = re.compile(what, re.S) search_get = search_re.findall(page, re.S) search_list.put(search_get) search_list.l_del_sa() return search_list.li2
def imit(self,url): date = Slist() adom = domain(url) try: page = urllib2.urlopen(url) html = page.read() if self.keyword != '': date.put(self.search(html,self.keyword)) date.l_del_sa() else: url_re = re.compile(r'http://[\.\w//]+',re.S) date.put(url_re.findall(html,re.S)) date.l_del_sa() for i in date.li2: for i in date.li2: for i in date.li2: dom = domain(i) if dom != adom: date.l_del_da(i) self.s1.put(date.li2) self.s1.l_del_sa() except: self.s1.put([]) return self.s1.li2
def __init__(self): self.s1 = Slist()