def __init__(self): #super(SearchParser, self).__init__(self) if badhtml: HTMLParser.__init__(self, NullFormatter()) else: HTMLParser.__init__(self) self.urls = []
def __init__(self, attr=None, tags=None): if attr is None: attr = 'href' if tags is None: tags = ('a', ) self.__attr = attr self.__tags = tags self.__P = HTMLParser(NullFormatter()) self.__saved = [] self.__tagHandlers = {}
def compactor(dev_filename, rel_filename): # Use compactor to generate release version. echo('Compacting: %s -> %s' % (dev_filename, rel_filename)) source_data = open(dev_filename, 'r').read() try: # Verify that the html file is correct htmlparser = HTMLParser(NullFormatter()) htmlparser.feed(source_data) htmlparser.close() # Now try to minify output_file = open(rel_filename, 'wb') compactor = HTMLMinifier(output_file.write, True) compactor.feed(source_data) compactor.close() output_file.close() except HTMLParseError as e: error(str(e)) exit(1)
#coding:utf-8 import urllib2 from htmllib import HTMLParser from formatter import NullFormatter import os import re url_name = "http://b.hatena.ne.jp/hotentry" html_data = urllib2.urlopen(url_name) parser = HTMLParser(NullFormatter()) try: parser.feed(html_data.read()) except TypeError: print "type error" pat = re.compile("^http.*") for link in parser.anchorlist: x = pat.search(link) if x is not None: print x.group(0)
def __init__(self): HTMLParser.__init__(self, NullFormatter()) self.clear_serv()
def __init__(self): HTMLParser.__init__(self, NullFormatter())
def __init__ (self): HTMLParser.__init__(self, NullFormatter()) self.reg = re.compile(r'.+/(?P<name>.+\.(?:' + "|".join(_IMAGE_EXT) + ')).*$') self.links = []
def __init__(self): NullFormatter.__init__(self) self.result = []