Beispiel #1
0
 def __init__(self, charset):
     self.encoder = getencoder(charset)
     return
Beispiel #2
0
    def usage():
        print 'usage: htmldom.py [-d] [-b base_href] [-c charset_in] [-C codec_out] [url ...]'
        sys.exit(2)

    try:
        (opts, args) = getopt.getopt(sys.argv[1:], 'dc:C:b:')
    except getopt.GetoptError:
        usage()
    (charset_in, codec_out, base_href) = ('iso-8859-1', 'euc-jp', '')
    for (k, v) in opts:
        if k == '-d': debug += 1
        elif k == '-c': charset_in = v
        elif k == '-C': codec_out = v
        elif k == '-b': base_href = v
    encoder = getencoder(codec_out)
    for url in args:
        agent = agent.Agent(debug=sys.stdout)
        (fp, content_type, charset) = agent.get(url)
        stylesheet = ActiveStyleSheet(agent, base_href=(base_href or url))
        root = parse(fp,
                     base_href=url,
                     charset=(charset or charset_in),
                     stylesheet=stylesheet)
        fp.close()
        stylesheet.dump()
        validate(root, root)
        if debug:
            for (i, e) in root.walk():
                sys.stdout.write('  ' * i + repr(e) + '\n')
        else:
Beispiel #3
0
 def __init__(self, charset):
   self.encoder = getencoder(charset)
   self.output_text = None
   return
Beispiel #4
0
 def __init__(self, charset):
   self.encoder = getencoder(charset)
   return
Beispiel #5
0
if __name__ == '__main__':
  import getopt, agent
  def usage():
    print 'usage: htmldom.py [-d] [-b base_href] [-c charset_in] [-C codec_out] [url ...]'
    sys.exit(2)
  try:
    (opts, args) = getopt.getopt(sys.argv[1:], 'dc:C:b:')
  except getopt.GetoptError:
    usage()
  (charset_in, codec_out, base_href) = ('iso-8859-1', 'euc-jp', '')
  for (k, v) in opts:
    if k == '-d': debug += 1
    elif k == '-c': charset_in = v
    elif k == '-C': codec_out = v
    elif k == '-b': base_href = v
  encoder = getencoder(codec_out)
  for url in args:
    agent = agent.Agent(debug=sys.stdout)
    (fp, content_type, charset) = agent.get(url)
    stylesheet = ActiveStyleSheet(agent, base_href=(base_href or url))
    root = parse(fp, base_href=url, charset=(charset or charset_in), stylesheet=stylesheet)
    fp.close()
    stylesheet.dump()
    validate(root, root)
    if debug:
      for (i,e) in root.walk():
        sys.stdout.write('  '*i+repr(e)+'\n')
    else:
      for c in root.dump():
        sys.stdout.write(encoder(c, 'xmlcharrefreplace')[0])