def imit(self,url): date = Slist() adom = domain(url) try: page = urllib2.urlopen(url) html = page.read() if self.keyword != '': date.put(self.search(html,self.keyword)) date.l_del_sa() else: url_re = re.compile(r'http://[\.\w//]+',re.S) date.put(url_re.findall(html,re.S)) date.l_del_sa() for i in date.li2: for i in date.li2: for i in date.li2: dom = domain(i) if dom != adom: date.l_del_da(i) self.s1.put(date.li2) self.s1.l_del_sa() except: self.s1.put([]) return self.s1.li2
def url_parse(url): global nadom, nerror adom = domain(url) nadom = 'thread_' + str(adom) + '_' + str(ct) nerror = 'error_' + str(adom) + '_' + str(ct) try: page = urllib2.urlopen(url) html = page.read() if want != 0: par_url = search(html, want) else: url_re = re.compile(r'http://[\.\w//]+',re.S) par_url = url_re.findall(html,re.S) urld = Slist() urld.put(par_url) urld.l_del_sa() for i in urld.li2: for i in urld.li2: for i in urld.li2: i_dom = domain(i) if i_dom != adom: urld.l_del_da(i) s.s_wirte(nerror, i) urls = urld.li2 return urls except: return 0