def __init__(self, fmt=AbstractFormatter): HTMLParser.__init__(self, fmt) self.result = "" self.open_tags = [] # A list of forbidden tags. self.forbidden_tags = ["script", "embed", "iframe", "frame"] # A list of tags that require no closing tag. self.requires_no_close = ["img", "br"] # A dictionary showing the only attributes allowed for particular tags. # If a tag is not listed here, it is allowed no attributes. Adding # "on" tags, like "onhover," would not be smart. Also be very careful # of "background" and "style." self.allowed_attributes = { "a": ["href", "title", "target", "style"], "img": ["src", "alt", "border", "style"], "blockquote": ["type", "style"], "font": ["size", "face"], "h5": ["style"], "h4": ["style"], "h3": ["style"], "h2": ["style"], "h1": ["style"], "table": ["border", "width", "height", "style", "align", "bgcolor"], "tbody": ["border", "width", "height", "style", "align", "bgcolor"], "tr": ["border", "width", "height", "style", "align", "bgcolor"], "td": ["border", "width", "height", "style", "align", "bgcolor"], "div": ["border", "width", "height", "style", "align", "bgcolor"], "span": ["border", "width", "height", "style", "align", "bgcolor"], } # The only schemes allowed in URLs (for href and src attributes). # Adding "javascript" or "vbscript" to this list would not be smart. self.allowed_schemes = ["http", "https", "ftp"]
def __init__(self, fmt=AbstractFormatter): HTMLParser.__init__(self, fmt) self.result = "" self.open_tags = [] # A list of the only tags allowed. Be careful adding to this. Adding # "script," for example, would not be smart. 'img' is out by default # because of the danger of IMG embedded commands, and/or web bugs. self.permitted_tags = [ 'a', 'b', 'blockquote', 'br', 'i', 'li', 'ol', 'ul', 'p', 'cite' ] # A list of tags that require no closing tag. self.requires_no_close = ['img', 'br'] # A dictionary showing the only attributes allowed for particular tags. # If a tag is not listed here, it is allowed no attributes. Adding # "on" tags, like "onhover," would not be smart. Also be very careful # of "background" and "style." self.allowed_attributes = \ {'a':['href','title'], 'img':['src','alt'], 'blockquote':['type']} # The only schemes allowed in URLs (for href and src attributes). # Adding "javascript" or "vbscript" to this list would not be smart. self.allowed_schemes = ['http', 'https', 'ftp']
def __init__(self, fmt=AbstractFormatter): HTMLParser.__init__(self, fmt) self.result = "" self.open_tags = [] # A list of the only tags allowed. Be careful adding to this. Adding # "script," for example, would not be smart. 'img' is out by default # because of the danger of IMG embedded commands, and/or web bugs. self.permitted_tags = ['a', 'b', 'blockquote', 'br', 'i', 'li', 'ol', 'ul', 'p', 'cite'] # A list of tags that require no closing tag. self.requires_no_close = ['img', 'br'] # A dictionary showing the only attributes allowed for particular tags. # If a tag is not listed here, it is allowed no attributes. Adding # "on" tags, like "onhover," would not be smart. Also be very careful # of "background" and "style." self.allowed_attributes = \ {'a':['href', 'title'], 'img':['src', 'alt'], 'blockquote':['type']} # The only schemes allowed in URLs (for href and src attributes). # Adding "javascript" or "vbscript" to this list would not be smart. self.allowed_schemes = ['http', 'https', 'ftp']
def __init__(self, fmt = AbstractFormatter): HTMLParser.__init__(self, fmt) self.result = "" self.open_tags = [] # A list of forbidden tags. self.forbidden_tags = ['script', 'embed', 'iframe', 'frame' ] # A list of tags that require no closing tag. self.requires_no_close = ['img', 'br'] # A dictionary showing the only attributes allowed for particular tags. # If a tag is not listed here, it is allowed no attributes. Adding # "on" tags, like "onhover," would not be smart. Also be very careful # of "background" and "style." self.allowed_attributes =\ {'a':['href','title','target','style'], 'img':['src','alt','border','style'], 'blockquote':['type','style'], 'table': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], 'tbody': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], 'tr': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], 'td': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], 'div': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], 'span': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], } # The only schemes allowed in URLs (for href and src attributes). # Adding "javascript" or "vbscript" to this list would not be smart. self.allowed_schemes = ['http','https','ftp']
def __init__( self, permitted_tags=['a', 'b', 'blockquote', 'br/', 'i', 'li', 'ol', 'ul', 'p', 'cite', 'code', 'pre', 'img/',], allowed_attributes={ 'a': ['href', 'title'], 'img': ['src', 'alt'], 'blockquote': ['type']}, fmt=AbstractFormatter, strip_disallowed=False ): HTMLParser.__init__(self, fmt) self.result = '' self.open_tags = [] self.permitted_tags = [i for i in permitted_tags if i[-1] != '/'] self.requires_no_close = [i[:-1] for i in permitted_tags if i[-1] == '/'] self.permitted_tags += self.requires_no_close self.allowed_attributes = allowed_attributes # The only schemes allowed in URLs (for href and src attributes). # Adding "javascript" or "vbscript" to this list would not be smart. self.allowed_schemes = ['http', 'https', 'ftp'] #to strip or escape disallowed tags? self.strip_disallowed = strip_disallowed self.in_disallowed = False
def __init__(self, funFormatter, objHere): self.objH1 = None self.objH2 = None self.objH3 = None self.dodHelp = GetDOD(objHere, "E3Help") HTMLParser.__init__(self, funFormatter)
def __init__( self, permitted_tags=[ 'a', 'b', 'blockquote', 'br/', 'i', 'li', 'ol', 'ul', 'p', 'cite', 'code', 'pre', 'img/', ], allowed_attributes={'a': ['href', 'title'], 'img': ['src', 'alt' ], 'blockquote': ['type']}, fmt=AbstractFormatter, ): HTMLParser.__init__(self, fmt) self.result = '' self.open_tags = [] self.permitted_tags = [i for i in permitted_tags if i[-1] != '/'] self.requires_no_close = [i[:-1] for i in permitted_tags if i[-1] == '/'] self.permitted_tags += self.requires_no_close self.allowed_attributes = allowed_attributes # The only schemes allowed in URLs (for href and src attributes). # Adding "javascript" or "vbscript" to this list would not be smart. self.allowed_schemes = ['http', 'https', 'ftp']
def __init__(self, formatter, path, output): HTMLParser.__init__(self, formatter) self.path = path # relative path self.ft = output # output file self.indent = 0 # number of tabs for pretty printing of files self.proc = False # True when actively processing, else False # (headers, footers, etc) # XXX This shouldn't need to be a stack -- anchors shouldn't nest. # XXX See SF bug <http://www.python.org/sf/546579>. self.hrefstack = [] # stack of hrefs from anchor begins
def __init__(self, formatter=AbstractFormatter(DumbWriter())): HTMLParser.__init__(self,formatter) self.intoTheBox=False #ingresso/uscita dall'ambiente del box self.intoTheTitle=None #ingresso/uscita dalla riga titolo self.intoTheNews=None #ingresso/uscita dal testo della notizia self.effectiveTitle=False #tag <span> del titolo self.effectiveDate=False #tag <span> della data self.effectiveAuth=False #tag <div> dell'autore self.writable=False #testo scrivibile self.cookedText="" #testo xml risultante
def __init__(self, formatter=AbstractFormatter(DumbWriter())): #HTMLParser.__init__(self) HTMLParser.__init__(self,formatter) self.intoTheAnchor=False #ingresso/uscita dai tag anchor self.intoTheDate=False #ingresso/uscita dal box data self.intoTheTitle=False #ingresso/uscita dal box titolo self.intoTheNews=False #ingresso/uscita dal box notizia self.writable=False #testo scrivibile self.maxNews=10 #totale notizie visualizzate self.cookedText="" #testo xml risultante self.gotTitle=False #flag indicante la reale presenza del titolo self.pseudoTitle="" #titolo fake (se mancante)
def __init__(self, fmt=AbstractFormatter): HTMLParser.__init__(self, fmt) self.result = "" self.open_tags = [] # A list of the only tags allowed. Be careful adding to this. Adding # "script," for example, would not be smart. 'img' is out by default # because of the danger of IMG embedded commands, and/or web bugs. self.permitted_tags = [ "a", "b", "blockquote", "br", "i", "sup", "sub", "strike", "hr", "u", "li", "ol", "ul", "p", "cite", "img", "style", "font", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "div", ] # A list of tags that require no closing tag. self.requires_no_close = ["img", "br"] # A dictionary showing the only attributes allowed for particular tags. # If a tag is not listed here, it is allowed no attributes. Adding # "on" tags, like "onhover," would not be smart. Also be very careful # of "background" and "style." self.allowed_attributes = {"a": ["href", "title"], "img": ["src", "alt"], "blockquote": ["type"]} # The only schemes allowed in URLs (for href and src attributes). # Adding "javascript" or "vbscript" to this list would not be smart. self.allowed_schemes = ["http", "https", "ftp"]
def __init__(self, fmt=AbstractFormatter): HTMLParser.__init__(self, fmt) self.result = "" self.open_tags = [] # A list of forbidden tags. self.forbidden_tags = ['script', 'embed', 'iframe', 'frame'] # A list of tags that require no closing tag. self.requires_no_close = ['img', 'br'] # A dictionary showing the only attributes allowed for particular tags. # If a tag is not listed here, it is allowed no attributes. Adding # "on" tags, like "onhover," would not be smart. Also be very careful # of "background" and "style." # <h5 style="text-align: center;"><b><i><u><font size="5" face="impact">THIS IS A TEST</font></u></i></b></h5> # <blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;"><p style="text-align: center;"> # <font size="5" face="arial" color="#cc3333">of the EBS</font></p><p style="text-align: center;"> # <font size="5" face="arial"><br></font></p><p style="text-align: center;"><font size="5" face="arial"> # <sup>reddit</sup><sub>2</sub></font></p> # <p style="text-align: center;"><font size="5" face="arial"><sub><br></sub></font></p> # <p style="text-align: center;"><font size="5" face="arial">fiiiiiii<sub>4</sub></font></p> # <p style="text-align: center;"><font size="5" face="arial"><sub><br></sub></font></p> # <p style="text-align: center;"><hr><br></p><p style="text-align: center;"> # <strike>strike</strike></p></blockquote> self.allowed_attributes =\ {'a':['href','title','target','style'], 'p': ['style'], 'img':['src','alt','border','style','align'], 'blockquote':['type','style','align'], 'font':['size','face','align'], 'h5':['style'],'h4':['style'],'h3':['style'],'h2':['style'],'h1':['style'], 'table': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], 'tbody': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], 'tr': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], 'td': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], 'div': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], 'span': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], } # The only schemes allowed in URLs (for href and src attributes). # Adding "javascript" or "vbscript" to this list would not be smart. self.allowed_schemes = ['http', 'https', 'ftp']
def __init__(self, fmt=AbstractFormatter): HTMLParser.__init__(self, fmt) self.result = "" self.open_tags = [] # A list of forbidden tags. self.forbidden_tags = ['script', 'embed', 'iframe', 'frame', ] # A list of tags that require no closing tag. self.requires_no_close = ['img', 'br'] # A dictionary showing the only attributes allowed for particular tags. # If a tag is not listed here, it is allowed no attributes. Adding # "on" tags, like "onhover," would not be smart. Also be very careful # of "background" and "style." # <h5 style="text-align: center;"><b><i><u><font size="5" face="impact">THIS IS A TEST</font></u></i></b></h5> # <blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;"><p style="text-align: center;"> # <font size="5" face="arial" color="#cc3333">of the EBS</font></p><p style="text-align: center;"> # <font size="5" face="arial"><br></font></p><p style="text-align: center;"><font size="5" face="arial"> # <sup>reddit</sup><sub>2</sub></font></p> # <p style="text-align: center;"><font size="5" face="arial"><sub><br></sub></font></p> # <p style="text-align: center;"><font size="5" face="arial">fiiiiiii<sub>4</sub></font></p> # <p style="text-align: center;"><font size="5" face="arial"><sub><br></sub></font></p> # <p style="text-align: center;"><hr><br></p><p style="text-align: center;"> # <strike>strike</strike></p></blockquote> self.allowed_attributes =\ {'a': ['href', 'title', 'target', 'style'], 'p': ['style'], 'img': ['src', 'alt', 'border', 'style', 'align'], 'blockquote': ['type', 'style', 'align'], 'font': ['size', 'face', 'align'], 'h5': ['style'], 'h4': ['style'], 'h3': ['style'], 'h2': ['style'], 'h1': ['style'], 'table': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], 'tbody': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], 'tr': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], 'td': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], 'div': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], 'span': ['border', 'width', 'height', 'style', 'align', 'bgcolor'], } # The only schemes allowed in URLs (for href and src attributes). # Adding "javascript" or "vbscript" to this list would not be smart. self.allowed_schemes = ['http', 'https', 'ftp']
def __init__(self, fmt = AbstractFormatter): HTMLParser.__init__(self, fmt) self.result = "" self.open_tags = [] self.at_users = [] self.permitted_tags = ['a', 'b', 'blockquote', 'br', 'i', 'li', 'ol', 'ul', 'p', 'cite', 'span', 'font', 'strike', 'div'] self.requires_no_close = ['img', 'br'] self.allowed_attributes = \ {'a':['href','title', 'style'], 'img':['src', 'width', 'height'], 'blockquote':['type'], 'span':['style'], 'font':['color'], 'div' :['style']} self.allowed_schemes = ['http','https','ftp']
def __init__(self): HTMLParser.__init__(self, NullFormatter()) self.reg = re.compile(r".+/(?P<name>.+\.(?:" + "|".join(_IMAGE_EXT) + ")).*$") self.links = []
def __init__(self, formatter): # class constructor HTMLParser.__init__(self, formatter) # base class constructor self.selectBasins = [] # create an empty list for BasinID self.selectYears = [] # creat an empty list for Years recorded self.insideBasinIdSelect = False self.insideYearSelect = False
def __init__(self): HTMLParser.__init__(self, AbstractFormatter(NullWriter())) self.result = [] self.requires_no_close = ['img', 'br']
def __init__(self): HTMLParser.__init__(self, '') self.script_tag_count = 0 self.images_items = ''
def __init__(self): HTMLParser.__init__(self, NullFormatter())
def __init__ (self): HTMLParser.__init__(self, NullFormatter()) self.reg = re.compile(r'.+/(?P<name>.+\.(?:' + "|".join(_IMAGE_EXT) + ')).*$') self.links = []
def __init__(self, given_formatter): HTMLParser.__init__(self, given_formatter) self.urls = []
def __init__(self): self.__links = [] self.__markup = '' self.__parser = HTMLParser.__init__(self, formatter.NullFormatter()) self.__pyurlopener = lib.PyURLOpener()
def __init__(self, formatter, verbose=0): HTMLParser.__init__(self, formatter, verbose) self._tableStack = [] self._currentTable = None self._currentCell = None self._tablesFound = []
def __init__(self, wait_parse, url): HTMLParser.__init__(self) self.wait_parse = wait_parse self.url = url
def __init__(self, require_link_target=False): HTMLParser.__init__(self, AbstractFormatter(NullWriter())) self.result = [] self.open_tags = [] # A list of the only tags allowed. Be careful adding to this. Adding # "script," for example, would not be smart. 'img' is out by default # because of the danger of IMG embedded commands, and/or web bugs. self.permitted_tags = [ 'a', 'b', 'br', 'em', 'i', 'li', 'ol', 'ul', 'p', 'strong', 'u', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote', 'q', 'cite', 'code', 'samp', 'kbd', 'var', 'dfn', 'address', 'big', 'small', 'ins', 'del', 'acronym', 'abbr', 'strike', 's', 'sub', 'sup', 'tt', 'pre', 'center', 'font', 'basefont', 'multicol', 'spacer', 'layer', 'ilayer', 'nolayer', 'img', 'map', 'area', 'param', 'hr', 'nobr', 'wbr', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'menu', 'dir', 'form', 'input', 'button', 'label', 'select', 'option', 'optgroup', 'textarea', 'fieldset', 'legend', 'table', 'tr', 'td', 'th', 'tbody', 'tfoot', 'thead', 'caption', 'col', 'colgroup', 'span', ] # A list of tags that require no closing tag. self.requires_no_close = ['img', 'br'] # A dictionary showing the only attributes allowed for particular tags. # If a tag is not listed here, it is allowed no attributes. Adding # "on" tags, like "onhover," would not be smart. Also be very careful # of "background" and "style." self.allowed_attributes = { 'a': ['href', 'target', 'rel'], 'p': ['align'], 'img': ['src', 'alt', 'border', 'title', "class"], 'table': ['cellpadding', 'cellspacing', 'border', 'width', 'height'], 'font': ['size', 'face', 'color', 'style', 'class'], 'span': ['style'], 'h3': ['style'], 'td': ['rowspan', 'colspan', 'width', 'height'], 'th': ['rowspan', 'colspan', 'width', 'height'], } # The only schemes allowed in URLs (for href and src attributes). # Adding "javascript" or "vbscript" to this list would not be smart. self.allowed_schemes = ['http', 'https', 'ftp', 'irc', 'mailto', ''] # Boolean indicating whether links need to have a target attribute. self.require_link_target = require_link_target
def __init__(self, fmt, base): HTMLParser.__init__(self, fmt) self.base = base
def __init__(self): HTMLParser.__init__(self, '') self.vote_tag_count = 0 self.votes_items = []
def __init__(self): HTMLParser.__init__(self, NullFormatter()) self.clear_serv()
def __init__(self, formatter, verbose=0): HTMLParser.__init__(self, formatter, verbose) self._tableStack = [] self._currentTable = None self._currentCell = None self._tablesFound =[]
def __init__(self): HTMLParser.__init__(self, '') self.script_tag_count = 0 self.title = ''
def __init__(self, formatter, verbose=0): HTMLParser.__init__(self,formatter, verbose) self.imagelist=[]
def __init__(self, allow_refs=False): HTMLParser.__init__(self, AbstractFormatter(NullWriter())) self.result = [] self.allow_refs = allow_refs
def __init__(self, name, data): f = AbstractFormatter(DumbWriter(open(name, 'w'), 100)) HTMLParser.__init__(self, f) self.feed(data) self.close()
def __init__( self, require_link_target = False ): HTMLParser.__init__( self, AbstractFormatter( NullWriter() ) ) self.result = [] self.open_tags = [] # A list of the only tags allowed. Be careful adding to this. Adding # "script," for example, would not be smart. 'img' is out by default # because of the danger of IMG embedded commands, and/or web bugs. self.permitted_tags = [ 'a', 'b', 'br', 'em', 'i', 'li', 'ol', 'ul', 'p', 'strong', 'u', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote', 'q', 'cite', 'code', 'samp', 'kbd', 'var', 'dfn', 'address', 'big', 'small', 'ins', 'del', 'acronym', 'abbr', 'strike', 's', 'sub', 'sup', 'tt', 'pre', 'center', 'font', 'basefont', 'multicol', 'spacer', 'layer', 'ilayer', 'nolayer', 'img', 'map', 'area', 'param', 'hr', 'nobr', 'wbr', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'menu', 'dir', 'form', 'input', 'button', 'label', 'select', 'option', 'optgroup', 'textarea', 'fieldset', 'legend', 'table', 'tr', 'td', 'th', 'tbody', 'tfoot', 'thead', 'caption', 'col', 'colgroup', 'span', ] # A list of tags that require no closing tag. self.requires_no_close = [ 'img', 'br' ] # A dictionary showing the only attributes allowed for particular tags. # If a tag is not listed here, it is allowed no attributes. Adding # "on" tags, like "onhover," would not be smart. Also be very careful # of "background" and "style." self.allowed_attributes = { 'a': [ 'href', 'target', 'rel' ], 'p': [ 'align' ], 'img': [ 'src', 'alt', 'border', 'title', "class" ], 'table': [ 'cellpadding', 'cellspacing', 'border', 'width', 'height' ], 'font': [ 'size', 'face', 'color', 'style', 'class' ], 'span': [ 'style' ], 'h3': [ 'style' ], 'td': [ 'rowspan', 'colspan', 'width', 'height' ], 'th': [ 'rowspan', 'colspan', 'width', 'height' ], } # The only schemes allowed in URLs (for href and src attributes). # Adding "javascript" or "vbscript" to this list would not be smart. self.allowed_schemes = ['http','https','ftp', 'irc','mailto',''] # Boolean indicating whether links need to have a target attribute. self.require_link_target = require_link_target
def __init__(self, adder): HTMLParser.__init__(self, formatter.NullFormatter(), 0) self.adder = adder
def __init__( self ): HTMLParser.__init__( self, AbstractFormatter( NullWriter() ) ) self.result = [] self.requires_no_close = [ 'img', 'br' ]
def __init__( self, allow_refs = False ): HTMLParser.__init__( self, AbstractFormatter( NullWriter() ) ) self.result = [] self.allow_refs = allow_refs