def __init__(self, charset): self.encoder = getencoder(charset) return
def usage(): print 'usage: htmldom.py [-d] [-b base_href] [-c charset_in] [-C codec_out] [url ...]' sys.exit(2) try: (opts, args) = getopt.getopt(sys.argv[1:], 'dc:C:b:') except getopt.GetoptError: usage() (charset_in, codec_out, base_href) = ('iso-8859-1', 'euc-jp', '') for (k, v) in opts: if k == '-d': debug += 1 elif k == '-c': charset_in = v elif k == '-C': codec_out = v elif k == '-b': base_href = v encoder = getencoder(codec_out) for url in args: agent = agent.Agent(debug=sys.stdout) (fp, content_type, charset) = agent.get(url) stylesheet = ActiveStyleSheet(agent, base_href=(base_href or url)) root = parse(fp, base_href=url, charset=(charset or charset_in), stylesheet=stylesheet) fp.close() stylesheet.dump() validate(root, root) if debug: for (i, e) in root.walk(): sys.stdout.write(' ' * i + repr(e) + '\n') else:
def __init__(self, charset): self.encoder = getencoder(charset) self.output_text = None return
def __init__(self, charset): self.encoder = getencoder(charset) return
if __name__ == '__main__': import getopt, agent def usage(): print 'usage: htmldom.py [-d] [-b base_href] [-c charset_in] [-C codec_out] [url ...]' sys.exit(2) try: (opts, args) = getopt.getopt(sys.argv[1:], 'dc:C:b:') except getopt.GetoptError: usage() (charset_in, codec_out, base_href) = ('iso-8859-1', 'euc-jp', '') for (k, v) in opts: if k == '-d': debug += 1 elif k == '-c': charset_in = v elif k == '-C': codec_out = v elif k == '-b': base_href = v encoder = getencoder(codec_out) for url in args: agent = agent.Agent(debug=sys.stdout) (fp, content_type, charset) = agent.get(url) stylesheet = ActiveStyleSheet(agent, base_href=(base_href or url)) root = parse(fp, base_href=url, charset=(charset or charset_in), stylesheet=stylesheet) fp.close() stylesheet.dump() validate(root, root) if debug: for (i,e) in root.walk(): sys.stdout.write(' '*i+repr(e)+'\n') else: for c in root.dump(): sys.stdout.write(encoder(c, 'xmlcharrefreplace')[0])