def do_img(self, attrs): src= self.getAtt(attrs, 'src') srcreal= self.getAtt(attrs, 'srcreal') if srcreal is not None: uo= urllib.urlopen(srcreal) fp= Store.storeopen(srcreal, self._root, 'w') fp.write(uo.read()) uo.close() fp.close()
def __init__(self, page): htmllib.HTMLParser.__init__(self, formatter.NullFormatter()) # turn on elements that weren't in HTML 2 for f in newhtmlelements.split(): self.setifnot('start_' + f, self.fake_start) self.setifnot('end_' + f, self.fake_end) for f in newhtmlelementsempty.split(): self.setifnot('do_' + f, self.fake_start) self._page= page self._httpRoot= page._httpRoot self._root= page.root() self._ofp= Store.storeopen(page.url(), self._root, 'w') self._anchors= {} self._forms= {} self._frames= {} self._siteRoot= page.siteRoot() self._url= page.url() self.feed(page.body()) self.close()