Python Article.body Examples

Programming Language: Python

Namespace/Package Name: newsroom.models

Class/Type: Article

Method/Function: body

Examples at hotexamples.com: 6

Python Article.body - 6 examples found. These are the top rated real world Python examples of newsroom.models.Article.body extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Article(12)

category(12)

slug(12)

title(12)

save(12)

published(8)

author_01(6)

author_02(5)

body(4)

external_primary_image(4)

publish_now(3)

byline(2)

created(2)

author_03(2)

pk(2)

primary_image_caption(2)

primary_image_size(2)

summary_text(2)

subtitle(1)

is_published(1)

include_in_rss(1)

exclude_from_list_views(1)

disqus_id(1)

topics(1)

Example #1

Show file

    def setUpTestData(cls):
        cls.client = Client()

        topic = Topic()
        topic.name = "government"
        topic.slug = "government"
        topic.save()

        category = Category()
        category.name = "Feature"
        category.slug = "feature"
        category.save()

        category = Category()
        category.name = "Photo essay"
        category.slug = "photo-essay"
        category.save()

        category = Category()
        category.name = "Opinion"
        category.slug = "opinion"
        category.save()

        category = Category()
        category.name = "Photo"
        category.slug = "photo"
        category.save()

        category = Category()
        category.name = "News"
        category.slug = "news"
        category.save()

        a = Article()
        a.title = "Test article 1"
        a.body = "<p>The quick brown fox jumps over the lazy dog.</p>"
        a.slug = "test-article-1"
        a.category = Category.objects.get(name="News")
        a.external_primary_image = \
            "http://www.w3schools.com/html/pic_mountain.jpg"
        a.save()
        a.publish_now()

        a = Article()
        a.title = "Test article 2"
        a.subtitle = "Dogs and things"
        a.body = "<p>How now brown cow.</p>"
        a.slug = "test-article-2"
        a.category = Category.objects.get(slug="opinion")
        a.save()
        a.publish_now()

        author = Author()
        author.first_names = "Joe"
        author.last_name = "Bloggs"
        author.email = "*****@*****.**"
        author.save()
        a.author_01 = author
        a.save()

Example #2

Show file

File: tests.py Project: danielsinaga1/djangonewsfix

    def setUpTestData(cls):
        republisher = Republisher()
        republisher.name = "NYT"
        republisher.email_addresses = "[email protected],[email protected]"
        republisher.message = "Hi. Please republish this."
        republisher.save()

        topic = Topic()
        topic.name = "government"
        topic.slug = "government"
        topic.save()

        category = Category()
        category.name = "Feature"
        category.slug = "feature"
        category.save()

        category = Category()
        category.name = "Photo essay"
        category.slug = "photo-essay"
        category.save()

        category = Category()
        category.name = "Opinion"
        category.slug = "opinion"
        category.save()

        category = Category()
        category.name = "Photo"
        category.slug = "photo"
        category.save()

        category = Category()
        category.name = "News"
        category.slug = "news"
        category.save()

        article = Article()
        article.title = "Test article 1"
        article.body = "<p>The quick brown fox jumps over the lazy dog.</p>"
        article.slug = "test-article-1"
        article.category = Category.objects.get(name="News")
        article.external_primary_image = \
            "http://www.w3schools.com/html/pic_mountain.jpg"
        article.save()
        article.publish_now()

        republisherArticle  = RepublisherArticle()
        republisherArticle.article = article
        republisherArticle.republisher = republisher
        republisherArticle.save()

Example #3

Show file

File: exportfromdrupal.py Project: danielsinaga1/djangonewsfix

def process(start, finish, html_file):
    soup = BeautifulSoup(open(html_file), "lxml")
    articles = soup.find_all("div")
    print("Number of articles", len(articles))
    if finish > len(articles):
        print("Truncating finish to ", len(articles))
        finish = len(articles)
    if start > finish:
        print("Start > finish")
        return

    articles = articles[start:finish]
    for idx, article in enumerate(articles):
        # Title, Link and date published
        link = article.find("a")["href"]
        link = 'http://gutest.nathangeffen.webfactional.com' + str(link)
        if link.find("/gallery/") == -1:
            print("Processing: ", idx + start, link)
        else:
            print("Ignoring: ", idx + start, link)
            continue

        newarticle = Article()
        newarticle.title = article.find("p",
                                        attrs={'class' : 'title'}).get_text(). \
            replace("â€™", "'").replace("â€œ",'"').replace("â€",'"')
        newarticle.slug = slugify(str(link).rpartition("/")[2])
        import datetime
        published = article.find('p', attrs={'class' : 'date'}).get_text()[2:] \
                    + " +0200"
        published_conv = datetime.datetime.strptime(published,
                                                    "%d/%m/%Y - %H:%M %z")
        newarticle.published = published_conv

        # Fetch article from web
        import urllib.request
        with urllib.request.urlopen(link) as response:
            html = response.read()
        soup = BeautifulSoup(html, "lxml")

        # Byline
        try:
            newarticle.byline = soup.find \
            ("div", { "class" : "article-author" }).get_text()
        except:
            print("Byline not found")

        # Intro
        try:
            intro = soup.find("div", {"class": "article-intro"})
            intro = intro.find("p")
            intro["class"] = "intro"
            newarticle.summary_text = str(intro)
        except:
            print("Intro not found")

        try:
            primary_image = soup.find("div", {"class" : "article-image"}). \
                            find("img")
            link = primary_image["src"]
            link = link.replace("http://gutest.nathangeffen.webfactional.com",
                                "http://groundup.org.za")
            newarticle.external_primary_image = link.replace(
                "/column_width/", "/article_image/")
            newarticle.primary_image_size = "LEAVE"
        except:
            print("Primary image not found")

        try:
            newarticle.primary_image_caption = primary_image["alt"]
        except:
            print("Primary image caption not found")

        # Body

        try:
            text = soup.find("div", {"class":"article-body"}). \
                   find("div", {"class":"field-item"})
        except:
            print("No text")
            text = ""

        body = str(intro) + str(text)  # "".join([str(item) for item in text])
        newarticle.body = body

        # category

        try:
            category = soup.find("div", attrs={
                'class': 'article-category'
            }).get_text()

            if category.lower() == "news":
                newarticle.category = "news"
            elif category.lower() == "featured story":
                newarticle.category = "featured story"
            elif category.lower() == "photo essay":
                newarticle.category = "photo essay"
            elif category.lower() == "photo":
                newarticle.category = "photo"
            elif category.lower() == "opinion":
                newarticle.category = "opinion"
            elif category.lower() == "brief":
                newarticle.category = "brief"
            elif category.lower() == "analysis":
                newarticle.category = "analysis"
            else:
                print("Unknown category: ", category)
        except:
            print("No category")

        # Topics
        try:
            topics = soup.find("div", {"class":"article-subject"}). \
                     get_text(", ")
        except:
            print("No topics")
            topics = ""
        try:
            tags = soup.find("div", {"class": "article-tags"}).get_text(", ")
        except:
            tags = ""

        if tags:
            if topics:
                topics += ", " + tags
            else:
                topics = tags

        topics_split = topics.split(",")
        if len(topics_split) > 8:
            topics = ", ".join(topics_split[0:8])
        newarticle.topics = topics

        # Saving
        try:
            article_to_replace = Article.objects.get(slug=newarticle.slug)
        except Article.DoesNotExist:
            print("Saving as new article")
            newarticle.save()
        else:
            print("Updating existing article")
            newarticle.pk = article_to_replace.pk
            newarticle.created = article_to_replace.created
            newarticle.save()

Example #4

Show file

File: exportfromdrupal.py Project: groundupnews/gu

def process(start, finish, html_file):
    soup = BeautifulSoup(open(html_file), "lxml")
    articles = soup.find_all("div")
    print("Number of articles", len(articles))
    if finish > len(articles):
        print("Truncating finish to ", len(articles))
        finish = len(articles)
    if start > finish:
        print("Start > finish")
        return

    articles = articles[start:finish]
    for idx,article in enumerate(articles):
        # Title, Link and date published
        link = article.find("a")["href"]
        link = 'http://gutest.nathangeffen.webfactional.com' + str(link)
        if link.find("/gallery/") == -1:
            print("Processing: ", idx + start, link)
        else:
            print("Ignoring: ", idx + start, link)
            continue

        newarticle = Article()
        newarticle.title = article.find("p",
                                        attrs={'class' : 'title'}).get_text(). \
            replace("â€™", "'").replace("â€œ",'"').replace("â€",'"')
        newarticle.slug = slugify(str(link).rpartition("/")[2])
        import datetime
        published = article.find('p', attrs={'class' : 'date'}).get_text()[2:] \
                    + " +0200"
        published_conv = datetime.datetime.strptime(published,
                                                    "%d/%m/%Y - %H:%M %z")
        newarticle.published = published_conv

        # Fetch article from web
        import urllib.request
        with urllib.request.urlopen(link) as response:
            html = response.read()
        soup = BeautifulSoup(html, "lxml")

        # Byline
        try:
            newarticle.byline = soup.find \
            ("div", { "class" : "article-author" }).get_text()
        except:
            print("Byline not found")

        # Intro
        try:
            intro = soup.find("div", {"class" : "article-intro"})
            intro = intro.find("p")
            intro["class"] = "intro"
            newarticle.summary_text = str(intro)
        except:
            print("Intro not found")

        try:
            primary_image = soup.find("div", {"class" : "article-image"}). \
                            find("img")
            link = primary_image["src"]
            link = link.replace("http://gutest.nathangeffen.webfactional.com",
                                "http://groundup.org.za")
            newarticle.external_primary_image = link.replace("/column_width/",
                                                             "/article_image/")
            newarticle.primary_image_size = "LEAVE"
        except:
            print("Primary image not found")

        try:
            newarticle.primary_image_caption = primary_image["alt"]
        except:
            print("Primary image caption not found")

        # Body

        try:
            text = soup.find("div", {"class":"article-body"}). \
                   find("div", {"class":"field-item"})
        except:
            print("No text")
            text = ""

        body = str(intro) + str(text) # "".join([str(item) for item in text])
        newarticle.body = body

        # category

        try:
            category = soup.find("div", attrs={'class' :
                                               'article-category'}).get_text()

            if category.lower() == "news":
                newarticle.category = "news"
            elif category.lower() == "featured story":
                newarticle.category = "featured story"
            elif category.lower() == "photo essay":
                newarticle.category = "photo essay"
            elif category.lower() == "photo":
                newarticle.category = "photo"
            elif category.lower() == "opinion":
                newarticle.category = "opinion"
            elif category.lower() == "brief":
                newarticle.category = "brief"
            elif category.lower() == "analysis":
                newarticle.category = "analysis"
            else:
                print("Unknown category: ", category)
        except:
            print("No category")

        # Topics
        try:
            topics = soup.find("div", {"class":"article-subject"}). \
                     get_text(", ")
        except:
            print("No topics")
            topics = ""
        try:
            tags = soup.find("div", {"class":"article-tags"}).get_text(", ")
        except:
            tags = ""

        if tags:
            if topics:
                topics += ", " + tags
            else:
                topics = tags

        topics_split = topics.split(",")
        if len(topics_split) > 8:
            topics = ", ".join(topics_split[0:8])
        newarticle.topics = topics

        # Saving
        try:
            article_to_replace = Article.objects.get(slug=newarticle.slug)
        except Article.DoesNotExist:
            print("Saving as new article")
            newarticle.save()
        else:
            print("Updating existing article")
            newarticle.pk = article_to_replace.pk
            newarticle.created = article_to_replace.created
            newarticle.save()

Example #5

Show file

File: missingcontent.py Project: groundupnews/gu

def process(start, finish, html_file):
    soup = BeautifulSoup(open(html_file), "lxml")

    articles = soup.find_all("div")
    # for article in articles:
    #     url = article.find("a")["href"]
    #     title = article.find("p", attrs={'class' : 'title'}).get_text()
    #     article_date = article.find("p", attrs={'class' : 'date'}).get_text()
    #     title = title.replace('\n', ' ').replace('\r', '')
    #     article_date = article_date.replace('\n', ' ').replace('\r', '')
    #     print( (url + " @ " + title + " | " + article_date).strip() )
    # return

    with open(html_file) as f:
        articles = f.readlines()
    print("Number of articles", len(articles))
    if finish > len(articles):
        print("Truncating finish to ", len(articles))
        finish = len(articles)
    if start > finish:
        print("Start > finish")
        return

    articles = articles[start:finish]

    for idx,line in enumerate(articles):
        link = line.partition(" @ ")[0]
        title = line.partition(" @ ")[2].partition(" | ")[0].strip()
        if len(title) > 200:
            title = title[0:200]
        date_time = line.partition(" | ")[2].strip() + " +0200"
        slug = slugify(link.rpartition("/")[2])
        new_link = "http://groundup.org.za/article/" + slug
        old_link = "http://gutest.nathangeffen.webfactional.com" + link

        html = ""

        newarticle = Article()
        newarticle.title = title
        newarticle.slug = slug
        import datetime
        newarticle.published = datetime.datetime.strptime(date_time,
                                                    "%d/%m/%Y - %H:%M %z")
        html = ""
        try:
            with urllib.request.urlopen(old_link) as response:
                html = response.read()
        except:
            print("CRITICAL: Can't find old link", old_link)
            continue
        html = str(html)
        pos = html.find('<div class="field field-name-field-where field-type-text field-label-above"><div class="field-label">Where is the event:&nbsp;</div>')

        if pos == -1:
            print("PROCESSING:", str(idx + start) + " " + link)
        else:
            print("IGNORING:", link)
            continue

        soup = BeautifulSoup(html, "lxml")

        # Byline
        try:
            newarticle.byline = soup.find \
            ("div", { "class" : "article-author" }).get_text()
        except:
            newarticle.byline = ""
            print("Byline not found")

        # Intro
        intro = ""
        try:
            intro = soup.find("div", {"class" : "article-intro"})
            intro = intro.find("p")
            intro["class"] = "intro"
            newarticle.summary_text = str(intro)
        except:
            intro = ""
            print("Intro not found")

        primary_image = ""
        try:
            primary_image = soup.find("div", {"class" : "article-image"}). \
                            find("img")
            link = primary_image["src"]
            link = link.replace("http://gutest.nathangeffen.webfactional.com",
                                "http://groundup.org.za")
            newarticle.external_primary_image = link.replace("/column_width/",
                                                             "/article_image/")
            newarticle.primary_image_size = "LEAVE"
            primary_image_found = True
        except:
            print("Primary image not found")
            primary_image_found = False

        try:
            newarticle.primary_image_caption = primary_image["alt"]
        except:
            try:
                newarticle.primary_image_caption = str(soup.find("div", \
                    {"class": "field-name-field-image-description"})).\
                    replace("\\n","").replace("\\'","'")
            except:
                newarticle.primary_image_caption = ""
                print("Primary image caption not found")

        if newarticle.summary_text == "" and primary_image_found == False:
            intro = newarticle.primary_image_caption
            newarticle.primary_image_caption = ""
            print("Swapping caption and intro")

        try:
            text = soup.find("div", {"class":"article-body"}). \
                   find("div", {"class":"field-item"})
        except:
            try:
                text = soup.find("div", {"class":"content"}). \
                       find("div", {"class":"field-item"})
            except:
                print("No text")
                text = ""

        if str(intro) != "None":
            body = str(intro) + str(text) # "".join([str(item) for item in text])
        else:
            body = str(text)

        newarticle.body = body.replace("\\n"," ")

        # category

        try:
            category = soup.find("div", attrs={'class' :
                                               'article-category'}).get_text()

            if category.lower() == "news":
                newarticle.category = "news"
            elif category.lower() == "featured story":
                newarticle.category = "featured story"
            elif category.lower() == "photo essay":
                newarticle.category = "photo essay"
            elif category.lower() == "photo story":
                newarticle.category = "photo essay"
            elif category.lower() == "photo":
                newarticle.category = "photo"
            elif category.lower() == "opinion":
                newarticle.category = "opinion"
            elif category.lower() == "brief":
                newarticle.category = "brief"
            elif category.lower() == "analysis":
                newarticle.category = "analysis"
            else:
                print("Unknown category: ", category)
        except:
            print("No category")

        # # Topics
        # try:
        #     topics = soup.find("div", {"class":"article-subject"}). \
        #              get_text(", ")
        # except:
        #     print("No topics")
        #     topics = ""
        # try:
        #     tags = soup.find("div", {"class":"article-tags"}).get_text(", ")
        # except:
        #     tags = ""

        # if tags:
        #     if topics:
        #         topics += ", " + tags
        #     else:
        #         topics = tags

        # topics_split = topics.split(",")
        # if len(topics_split) > 8:
        #     topics = ", ".join(topics_split[0:8])
        # newarticle.topics = topics

        # Disqus
        newarticle.disqus_id = "node/"
        id = html.partition("/node/")[2][0:4]
        digits = [str(x) for x in list(range(10))]
        if id[0] in digits:
            newarticle.disqus_id = newarticle.disqus_id + id[0]
        if id[1] in digits:
            newarticle.disqus_id = newarticle.disqus_id + id[1]
        if id[2] in digits:
            newarticle.disqus_id = newarticle.disqus_id + id[2]
        if id[3] in digits:
            newarticle.disqus_id = newarticle.disqus_id + id[3]


        newarticle.include_in_rss = False
        newarticle.exclude_from_list_views = True

        # Saving
        try:
            article_to_replace = Article.objects.get(slug=newarticle.slug)
        except Article.DoesNotExist:
            print("Saving as new article")
            newarticle.save()
        else:
            print("Updating existing article")
            newarticle.pk = article_to_replace.pk
            newarticle.created = article_to_replace.created
            newarticle.save()
    return

Example #6

Show file

def process(start, finish, html_file):
    soup = BeautifulSoup(open(html_file), "lxml")

    articles = soup.find_all("div")
    # for article in articles:
    #     url = article.find("a")["href"]
    #     title = article.find("p", attrs={'class' : 'title'}).get_text()
    #     article_date = article.find("p", attrs={'class' : 'date'}).get_text()
    #     title = title.replace('\n', ' ').replace('\r', '')
    #     article_date = article_date.replace('\n', ' ').replace('\r', '')
    #     print( (url + " @ " + title + " | " + article_date).strip() )
    # return

    with open(html_file) as f:
        articles = f.readlines()
    print("Number of articles", len(articles))
    if finish > len(articles):
        print("Truncating finish to ", len(articles))
        finish = len(articles)
    if start > finish:
        print("Start > finish")
        return

    articles = articles[start:finish]

    for idx, line in enumerate(articles):
        link = line.partition(" @ ")[0]
        title = line.partition(" @ ")[2].partition(" | ")[0].strip()
        if len(title) > 200:
            title = title[0:200]
        date_time = line.partition(" | ")[2].strip() + " +0200"
        slug = slugify(link.rpartition("/")[2])
        new_link = "http://groundup.org.za/article/" + slug
        old_link = "http://gutest.nathangeffen.webfactional.com" + link

        html = ""

        newarticle = Article()
        newarticle.title = title
        newarticle.slug = slug
        import datetime
        newarticle.published = datetime.datetime.strptime(
            date_time, "%d/%m/%Y - %H:%M %z")
        html = ""
        try:
            with urllib.request.urlopen(old_link) as response:
                html = response.read()
        except:
            print("CRITICAL: Can't find old link", old_link)
            continue
        html = str(html)
        pos = html.find(
            '<div class="field field-name-field-where field-type-text field-label-above"><div class="field-label">Where is the event:&nbsp;</div>'
        )

        if pos == -1:
            print("PROCESSING:", str(idx + start) + " " + link)
        else:
            print("IGNORING:", link)
            continue

        soup = BeautifulSoup(html, "lxml")

        # Byline
        try:
            newarticle.byline = soup.find \
            ("div", { "class" : "article-author" }).get_text()
        except:
            newarticle.byline = ""
            print("Byline not found")

        # Intro
        intro = ""
        try:
            intro = soup.find("div", {"class": "article-intro"})
            intro = intro.find("p")
            intro["class"] = "intro"
            newarticle.summary_text = str(intro)
        except:
            intro = ""
            print("Intro not found")

        primary_image = ""
        try:
            primary_image = soup.find("div", {"class" : "article-image"}). \
                            find("img")
            link = primary_image["src"]
            link = link.replace("http://gutest.nathangeffen.webfactional.com",
                                "http://groundup.org.za")
            newarticle.external_primary_image = link.replace(
                "/column_width/", "/article_image/")
            newarticle.primary_image_size = "LEAVE"
            primary_image_found = True
        except:
            print("Primary image not found")
            primary_image_found = False

        try:
            newarticle.primary_image_caption = primary_image["alt"]
        except:
            try:
                newarticle.primary_image_caption = str(soup.find("div", \
                    {"class": "field-name-field-image-description"})).\
                    replace("\\n","").replace("\\'","'")
            except:
                newarticle.primary_image_caption = ""
                print("Primary image caption not found")

        if newarticle.summary_text == "" and primary_image_found == False:
            intro = newarticle.primary_image_caption
            newarticle.primary_image_caption = ""
            print("Swapping caption and intro")

        try:
            text = soup.find("div", {"class":"article-body"}). \
                   find("div", {"class":"field-item"})
        except:
            try:
                text = soup.find("div", {"class":"content"}). \
                       find("div", {"class":"field-item"})
            except:
                print("No text")
                text = ""

        if str(intro) != "None":
            body = str(intro) + str(
                text)  # "".join([str(item) for item in text])
        else:
            body = str(text)

        newarticle.body = body.replace("\\n", " ")

        # category

        try:
            category = soup.find("div", attrs={
                'class': 'article-category'
            }).get_text()

            if category.lower() == "news":
                newarticle.category = "news"
            elif category.lower() == "featured story":
                newarticle.category = "featured story"
            elif category.lower() == "photo essay":
                newarticle.category = "photo essay"
            elif category.lower() == "photo story":
                newarticle.category = "photo essay"
            elif category.lower() == "photo":
                newarticle.category = "photo"
            elif category.lower() == "opinion":
                newarticle.category = "opinion"
            elif category.lower() == "brief":
                newarticle.category = "brief"
            elif category.lower() == "analysis":
                newarticle.category = "analysis"
            else:
                print("Unknown category: ", category)
        except:
            print("No category")

        # # Topics
        # try:
        #     topics = soup.find("div", {"class":"article-subject"}). \
        #              get_text(", ")
        # except:
        #     print("No topics")
        #     topics = ""
        # try:
        #     tags = soup.find("div", {"class":"article-tags"}).get_text(", ")
        # except:
        #     tags = ""

        # if tags:
        #     if topics:
        #         topics += ", " + tags
        #     else:
        #         topics = tags

        # topics_split = topics.split(",")
        # if len(topics_split) > 8:
        #     topics = ", ".join(topics_split[0:8])
        # newarticle.topics = topics

        # Disqus
        newarticle.disqus_id = "node/"
        id = html.partition("/node/")[2][0:4]
        digits = [str(x) for x in list(range(10))]
        if id[0] in digits:
            newarticle.disqus_id = newarticle.disqus_id + id[0]
        if id[1] in digits:
            newarticle.disqus_id = newarticle.disqus_id + id[1]
        if id[2] in digits:
            newarticle.disqus_id = newarticle.disqus_id + id[2]
        if id[3] in digits:
            newarticle.disqus_id = newarticle.disqus_id + id[3]

        newarticle.include_in_rss = False
        newarticle.exclude_from_list_views = True

        # Saving
        try:
            article_to_replace = Article.objects.get(slug=newarticle.slug)
        except Article.DoesNotExist:
            print("Saving as new article")
            newarticle.save()
        else:
            print("Updating existing article")
            newarticle.pk = article_to_replace.pk
            newarticle.created = article_to_replace.created
            newarticle.save()
    return