Python clean_html Examples

Programming Language: Python

Namespace/Package Name: app.loaders.helpers

Method/Function: clean_html

Examples at hotexamples.com: 12

Python clean_html - 12 examples found. These are the top rated real world Python examples of app.loaders.helpers.clean_html extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def _get_content(html):
    articleDescription = html.find('p', {'class': 'description'})
    articleDescription = clean_html(str(articleDescription))

    articleContent = html.find('div', {'class': 'story-content'})
    articleContent = clean_html(str(articleContent))

    return articleDescription + articleContent

Example #2

Show file

def _get_date(html):
    # Finds the author and posted date class.
    authorName = html.findAll('p', {'class': 'author-name'})
    timestamp_resultset = str(authorName)

    # Converts to BS object to find span class where posted.
    # Date is present.
    tS = BeautifulSoup(timestamp_resultset, 'lxml')
    tS = tS.find_all('span')

    # Maps and converts to raw string data.
    raw_content_str = map(str, tS)
    date = clean_html(' '.join(raw_content_str))
    if not date:
        date = html.find('div', {'class': 'publishDate'})
        date = clean_html(' '.join(date))
    date = datetime.datetime.strptime(date, "%d %B %Y").strftime("%Y-%m-%d")
    return date

Example #3

Show file

File: wired.py Project: jamo95/Newsy

def _get_date(html):
    date = html.find('time', {'class': 'date-mdy'})
    #format is mm.dd.yy
    dates = str(date).split('.')
    #reformat
    month = dates[0]
    day = dates[1]
    year = "20" + dates[2]
    publish_date = year + "-" + month + "-" + day
    return clean_html(str(publish_date))

Example #4

Show file

File: wired.py Project: jamo95/Newsy

def _get_content(html):
    articleText = html.find('article')
    articleText = str(articleText)

    articleSoup = BeautifulSoup(articleText, 'lxml')
    articleSoup = articleSoup.find('div')
    articleSoup = articleSoup.findAll('p')

    raw_content_str = map(str, articleSoup)

    return clean_html(' '.join(raw_content_str))

Example #5

Show file

def _get_content(html):
    articleText = html.find('div', {'class': 'article-content'})
    articleText = str(articleText)

    articleSoup = BeautifulSoup(articleText, 'lxml')
    articleSoup = articleSoup.findAll('p')
    #print(str(articleSoup))
    raw_content_str = map(str, articleSoup)

    finalReturn = clean_html(' '.join(raw_content_str))

    return finalReturn

Example #6

Show file

def _get_content(html):
    articleText = html.find('div', {'class': 'article-text-update'})

    if not articleText:
        articleText = html.find('div', {'class': 'article-text text-merri'})

    articleText = str(articleText)

    articleSoup = BeautifulSoup(articleText, 'lxml')
    articleSoup = articleSoup.findAll('p')

    raw_content_str = map(str, articleSoup)

    return clean_html(' '.join(raw_content_str))

Example #7

Show file

File: hackernoon.py Project: jamo95/Newsy

def _get_content(html):
    articleText = html.findAll('div', {'class': 'section-content'})
    cleanText = clean_html(' '.join(map(str, articleText)))
    return cleanText

Example #8

Show file

def _get_date(html):
    date = html.find('time', {'class': 'the-time'})
    date = clean_html(str(date))
    return date

Example #9

Show file

def _get_title(html):
    title = html.find('h1', {'class': 'article-title'})
    title = clean_html(str(title))

    return title

Example #10

Show file

def _get_content(html):
    raw_content_str = map(str, html.select('.text')[0].contents)
    return clean_html(' '.join(raw_content_str))

Example #11

Show file

File: wired.py Project: jamo95/Newsy

def _get_title(html):
    title = html.find('h1', {'class': 'title'})
    return clean_html(str(title))

Example #12

Show file

def _get_title(html):

    title = html.find('h1', {'class': 'story-headline'})

    return clean_html(str(title))