def end(self, downer): for url, name in self.parse(): url = abs_href(url, self.url) if url in downer.history: break downer.add( self.next_saver(url, "%s/%s" % (self.path, name.replace('/', '_'))))
def end(self,downer): for url,name in self.parse(): downer.add( self.next_saver( abs_href(url,self.url), "%s/%s"%(self.path,name.replace('/','_')) ) )
def img_saver(downer, url, html, url_prefix, base_dir=join("img")): html = BeautifulSoup(html) for i in html.findAll('img'): src = i.get('src', None) if src: url = abs_href(src, url) ref = url.split('://')[1] i['src'] = join(url_prefix, ref) downer.add(url, join(base_dir, ref)) return str(html)
def end(self,downer): for url,name in self.parse(): url=abs_href(url,self.url) if url in downer.history:break downer.add( self.next_saver( url, "%s/%s"%(self.path,name.replace('/','_')) ) )
def img_saver(downer,url,html,url_prefix,base_dir=join("img")): html=BeautifulSoup(html) for i in html.findAll('img'): src=i.get('src',None) if src: url=abs_href(src,url) ref=url.split('://')[1] i['src']=join(url_prefix,ref) downer.add(url,join(base_dir,ref)) return str(html)
def end(self, downer): for url, name in self.parse(): downer.add( self.next_saver(abs_href(url, self.url), "%s/%s" % (self.path, name.replace('/', '_'))))