def __iter__(self): for token in HTMLTokenizer.__iter__(self): token = self.sanitize_token(token) if token: yield token
def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True, lowercaseElementName=True, lowercaseAttrName=True, **kwargs): HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet, lowercaseElementName, lowercaseAttrName, **kwargs)