Ejemplos de clean_html en Python

Lenguaje de programación: Python

Namespace/Package Name: app.loaders.helpers

Método / Función: clean_html

Ejemplos en hotexamples.com: 12

Python clean_html - 12 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de app.loaders.helpers.clean_html extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

def _get_content(html):
    articleDescription = html.find('p', {'class': 'description'})
    articleDescription = clean_html(str(articleDescription))

    articleContent = html.find('div', {'class': 'story-content'})
    articleContent = clean_html(str(articleContent))

    return articleDescription + articleContent

Ejemplo n.º 2

Mostrar archivo

def _get_date(html):
    # Finds the author and posted date class.
    authorName = html.findAll('p', {'class': 'author-name'})
    timestamp_resultset = str(authorName)

    # Converts to BS object to find span class where posted.
    # Date is present.
    tS = BeautifulSoup(timestamp_resultset, 'lxml')
    tS = tS.find_all('span')

    # Maps and converts to raw string data.
    raw_content_str = map(str, tS)
    date = clean_html(' '.join(raw_content_str))
    if not date:
        date = html.find('div', {'class': 'publishDate'})
        date = clean_html(' '.join(date))
    date = datetime.datetime.strptime(date, "%d %B %Y").strftime("%Y-%m-%d")
    return date

Ejemplo n.º 3

Mostrar archivo

Archivo: wired.py Proyecto: jamo95/Newsy

def _get_date(html):
    date = html.find('time', {'class': 'date-mdy'})
    #format is mm.dd.yy
    dates = str(date).split('.')
    #reformat
    month = dates[0]
    day = dates[1]
    year = "20" + dates[2]
    publish_date = year + "-" + month + "-" + day
    return clean_html(str(publish_date))

Ejemplo n.º 4

Mostrar archivo

Archivo: wired.py Proyecto: jamo95/Newsy

def _get_content(html):
    articleText = html.find('article')
    articleText = str(articleText)

    articleSoup = BeautifulSoup(articleText, 'lxml')
    articleSoup = articleSoup.find('div')
    articleSoup = articleSoup.findAll('p')

    raw_content_str = map(str, articleSoup)

    return clean_html(' '.join(raw_content_str))

Ejemplo n.º 5

Mostrar archivo

def _get_content(html):
    articleText = html.find('div', {'class': 'article-content'})
    articleText = str(articleText)

    articleSoup = BeautifulSoup(articleText, 'lxml')
    articleSoup = articleSoup.findAll('p')
    #print(str(articleSoup))
    raw_content_str = map(str, articleSoup)

    finalReturn = clean_html(' '.join(raw_content_str))

    return finalReturn

Ejemplo n.º 6

Mostrar archivo

def _get_content(html):
    articleText = html.find('div', {'class': 'article-text-update'})

    if not articleText:
        articleText = html.find('div', {'class': 'article-text text-merri'})

    articleText = str(articleText)

    articleSoup = BeautifulSoup(articleText, 'lxml')
    articleSoup = articleSoup.findAll('p')

    raw_content_str = map(str, articleSoup)

    return clean_html(' '.join(raw_content_str))

Ejemplo n.º 7

Mostrar archivo

Archivo: hackernoon.py Proyecto: jamo95/Newsy

def _get_content(html):
    articleText = html.findAll('div', {'class': 'section-content'})
    cleanText = clean_html(' '.join(map(str, articleText)))
    return cleanText

Ejemplo n.º 8

Mostrar archivo

def _get_date(html):
    date = html.find('time', {'class': 'the-time'})
    date = clean_html(str(date))
    return date

Ejemplo n.º 9

Mostrar archivo

def _get_title(html):
    title = html.find('h1', {'class': 'article-title'})
    title = clean_html(str(title))

    return title

Ejemplo n.º 10

Mostrar archivo

def _get_content(html):
    raw_content_str = map(str, html.select('.text')[0].contents)
    return clean_html(' '.join(raw_content_str))

Ejemplo n.º 11

Mostrar archivo

Archivo: wired.py Proyecto: jamo95/Newsy

def _get_title(html):
    title = html.find('h1', {'class': 'title'})
    return clean_html(str(title))

Ejemplo n.º 12

Mostrar archivo

def _get_title(html):

    title = html.find('h1', {'class': 'story-headline'})

    return clean_html(str(title))