예제 #1
0
 def __init__(self,
              stream,
              encoding=None,
              parseMeta=True,
              useChardet=True,
              lowercaseElementName=False,
              lowercaseAttrName=False):
     # Change case matching defaults as we only output lowercase html anyway
     # This solution doesn't seem ideal...
     HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
                            lowercaseElementName, lowercaseAttrName)
예제 #2
0
 def __init__(self,
              stream,
              encoding=None,
              parseMeta=True,
              useChardet=True,
              lowercaseElementName=True,
              lowercaseAttrName=True,
              **kwargs):
     HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
                            lowercaseElementName, lowercaseAttrName,
                            **kwargs)
예제 #3
0
 def __iter__(self):
     for token in HTMLTokenizer.__iter__(self):
         token = self.sanitize_token(token)
         if token:
             yield token
예제 #4
0
 def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
              lowercaseElementName=True, lowercaseAttrName=True):
     HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
                            lowercaseElementName, lowercaseAttrName)
예제 #5
0
파일: html.py 프로젝트: tantalor/emend
 def __iter__(self):
   for token in HTMLTokenizer.__iter__(self):
     if token["type"] not in tag_types:
       yield token
예제 #6
0
 def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
              lowercaseElementName=False, lowercaseAttrName=False):
     #Change case matching defaults as we only output lowercase html anyway
     #This solution doesn't seem ideal...
     HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
                            lowercaseElementName, lowercaseAttrName)
예제 #7
0
 def parse(self, stream, output=True):
     tokenizer = HTMLTokenizer(stream)
     for token in tokenizer:
         if output:
             print(token)
예제 #8
0
파일: sanitizer.py 프로젝트: calvin/bleach
 def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
              lowercaseElementName=True, lowercaseAttrName=True, **kwargs):
     HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
                            lowercaseElementName, lowercaseAttrName,
                            **kwargs)
     self.unmatched_tags = []
예제 #9
0
 def reset(self):
     self.src = StringIO()
     self.tokenizer = HTMLTokenizer(self.src)