def __init__(self, doc=sys.stdout): NullWriter.__init__(self) self.doc = doc self.list_depth = 0 self.footnote_marker_format = '[%d]' self.features = []
def test(): import sys file = 'test.html' if sys.argv[1:]: file = sys.argv[1] fp = open(file, 'r') data = fp.read() fp.close() from formatter import NullWriter, AbstractFormatter w = NullWriter() f = AbstractFormatter(w) p = HTMLParser(f) p.feed(data) p.close()
def __init__(self, maxcol=72): self.text = '' self.maxcol = maxcol NullWriter.__init__(self) self.reset()
def __init__(self): NullWriter.__init__(self) self.saved_clear()
def __init__(self): HTMLParser.__init__(self, AbstractFormatter(NullWriter())) self.result = [] self.requires_no_close = ['img', 'br']
def __init__(self): NullWriter.__init__(self) self._bodyText=[]
def __init__(self): AbstractFormatter.__init__(self, NullWriter()) self.m_raw = [] self.page_width = 60 self.cursor = 0
def __init__(self, require_link_target=False): HTMLParser.__init__(self, AbstractFormatter(NullWriter())) self.result = [] self.open_tags = [] # A list of the only tags allowed. Be careful adding to this. Adding # "script," for example, would not be smart. 'img' is out by default # because of the danger of IMG embedded commands, and/or web bugs. self.permitted_tags = [ 'a', 'b', 'br', 'em', 'i', 'li', 'ol', 'ul', 'p', 'strong', 'u', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote', 'q', 'cite', 'code', 'samp', 'kbd', 'var', 'dfn', 'address', 'big', 'small', 'ins', 'del', 'acronym', 'abbr', 'strike', 's', 'sub', 'sup', 'tt', 'pre', 'center', 'font', 'basefont', 'multicol', 'spacer', 'layer', 'ilayer', 'nolayer', 'img', 'map', 'area', 'param', 'hr', 'nobr', 'wbr', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'menu', 'dir', 'form', 'input', 'button', 'label', 'select', 'option', 'optgroup', 'textarea', 'fieldset', 'legend', 'table', 'tr', 'td', 'th', 'tbody', 'tfoot', 'thead', 'caption', 'col', 'colgroup', 'span', ] # A list of tags that require no closing tag. self.requires_no_close = ['img', 'br'] # A dictionary showing the only attributes allowed for particular tags. # If a tag is not listed here, it is allowed no attributes. Adding # "on" tags, like "onhover," would not be smart. Also be very careful # of "background" and "style." self.allowed_attributes = { 'a': ['href', 'target', 'rel'], 'p': ['align'], 'img': ['src', 'alt', 'border', 'title', "class"], 'table': ['cellpadding', 'cellspacing', 'border', 'width', 'height'], 'font': ['size', 'face', 'color', 'style', 'class'], 'span': ['style'], 'h3': ['style'], 'td': ['rowspan', 'colspan', 'width', 'height'], 'th': ['rowspan', 'colspan', 'width', 'height'], } # The only schemes allowed in URLs (for href and src attributes). # Adding "javascript" or "vbscript" to this list would not be smart. self.allowed_schemes = ['http', 'https', 'ftp', 'irc', 'mailto', ''] # Boolean indicating whether links need to have a target attribute. self.require_link_target = require_link_target
def __init__(self, allow_refs=False): HTMLParser.__init__(self, AbstractFormatter(NullWriter())) self.result = [] self.allow_refs = allow_refs