def __iter__(self): for token in Filter.__iter__(self): if token["type"] in ["StartTag", "EmptyTag"] and token["data"]: for attr, value in token["data"].items(): token["data"][attr] = "moo" yield token
def __iter__(self): for token in Filter.__iter__(self): if token['type'] in ['StartTag', 'EmptyTag'] and token['data']: for attr, value in token['data'].items(): token['data'][attr] = 'moo' yield token
def __iter__(self): for token in Filter.__iter__(self): if token['type'] in ['StartTag', 'EmptyTag'] and token['data']: if (None, 'href') in token['data']: # This means a href attribute with no namespace if ':' not in token['data'][(None, 'href')]: # Relative link! del token['data'][(None, 'href')] yield token
def __iter__(self): for token in Filter.__iter__(self): if token['type'] in ['StartTag', 'EmptyTag'] and token['data']: for attr, value in token['data'].items(): if attr[1] == 'src': src = '/safeImage?url=' + urllib.parse.quote_plus( token['data'][attr]) token['data'][attr] = src yield token
def __iter__(self): for token in Filter.__iter__(self): if token['type'] in ['StartTag', 'EmptyTag'] and token['data']: for attr, value in token['data'].items(): if attr[1] == 'href': href = token['data'][attr] if href.startswith('//'): href = 'https:' + href href = urllib.parse.unquote(href) href = re.sub('"', '%22', href) token['data'][attr] = href yield token
def __iter__(self): remove_end_tag = False for token in Filter.__iter__(self): # only check anchor tags if 'name' in token and token['name'] == 'a' and token['type'] in ['StartTag', 'EndTag']: if token['type'] == 'StartTag': remove_end_tag = True for attr, value in token['data'].items(): if attr == (None, 'href') and value != '' and is_valid_url(value): remove_end_tag = False if remove_end_tag: continue elif token['type'] == 'EndTag' and remove_end_tag: remove_end_tag = False continue yield token
def __iter__(self): remove_end_tag = False for token in Filter.__iter__(self): # only check anchor tags if 'name' in token and token['name'] == 'a' and token['type'] in [ 'StartTag', 'EndTag' ]: if token['type'] == 'StartTag': remove_end_tag = True for attr, value in token['data'].items(): if attr == (None, 'href' ) and value != '' and is_valid_url(value): remove_end_tag = False if remove_end_tag: continue elif token['type'] == 'EndTag' and remove_end_tag: remove_end_tag = False continue yield token