コード例 #1
0
ファイル: strings.py プロジェクト: AlexUlrich/digsby
def strip_html_and_tags(s, invalid_tags):
    '''
    content between "invalid_tags" is removed
    '''
    if not s: return s

    from util.BeautifulSoup import BeautifulSoup
    soup = BeautifulSoup(s.replace('<br>','\n').replace('<br/>','\n').replace('<br />', '\n'))
    for tag in invalid_tags:
        for result in soup.findAll(name=tag):
            result.replaceWith("")

    return ''.join(e for e in soup.recursiveChildGenerator()
                   if isinstance(e,unicode))
コード例 #2
0
def strip_html_and_tags(s, invalid_tags):
    '''
    content between "invalid_tags" is removed
    '''
    if not s: return s

    from util.BeautifulSoup import BeautifulSoup
    soup = BeautifulSoup(
        s.replace('<br>', '\n').replace('<br/>', '\n').replace('<br />', '\n'))
    for tag in invalid_tags:
        for result in soup.findAll(name=tag):
            result.replaceWith("")

    return ''.join(e for e in soup.recursiveChildGenerator()
                   if isinstance(e, unicode))
コード例 #3
0
def strip_html2(s):
    '''
    Strips out HTML with the BeautifulSoup library.

    >>> strip_html2('<html><body><b>Some <i>ugly</i></b> html.</body></html>')
    u'Some ugly html.'
    '''
    if not s: return s

    from util.BeautifulSoup import BeautifulSoup
    soup = BeautifulSoup(s)

    text_pieces = []
    for pc in soup.recursiveChildGenerator():
        if isinstance(pc, unicode):
            text_pieces.append(pc)
        elif pc.name == 'br':
            text_pieces.append('\n')

    return ''.join(text_pieces)
コード例 #4
0
ファイル: strings.py プロジェクト: AlexUlrich/digsby
def strip_html2(s):
    '''
    Strips out HTML with the BeautifulSoup library.

    >>> strip_html2('<html><body><b>Some <i>ugly</i></b> html.</body></html>')
    u'Some ugly html.'
    '''
    if not s: return s

    from util.BeautifulSoup import BeautifulSoup
    soup = BeautifulSoup(s)

    text_pieces = []
    for pc in soup.recursiveChildGenerator():
        if isinstance(pc, unicode):
            text_pieces.append(pc)
        elif pc.name == 'br':
            text_pieces.append('\n')

    return ''.join(text_pieces)