Beispiel #1
0
def add_article(source_id, entry):
    if FeedArticle.query.filter_by(link=entry.link, source_id=source_id).first():
        return

    summary = BeautifulSoup(entry.summary, 'lxml').get_text()
    article = FeedArticle(
        link=entry.link,
        title=entry.title,
        summary=summary,
        source_id=source_id,
        html=entry.summary)

    if 'media_thumbnail' in entry:
        article.thumbnail_url = entry['media_thumbnail'][0]['url']

    if not article.thumbnail_url and 'links' in entry:
        links = entry['links']
        for link in links:
            if 'type' in link and link['type'].startswith('image'):
                if 'href' in link:
                    article.thumbnail_url = link['href']
                    break

    if article.summary and not article.thumbnail_url:
        article.thumbnail_url = get_thumbnail_url_from_summary(article.html)

    cdb.session.add(article)
    cdb.session.commit()
Beispiel #2
0
def add_article(article):
    if article.summary and not article.thumbnail_url:
        article.thumbnail_url = get_thumbnail_url_from_summary(article.html)

    # if not article.thumbnail_url:
    #     article.thumbnail_url = get_thumbnail_url_from_html(html_readable)

    cdb.session.add(article)
    cdb.session.commit()