Python Article.topics Examples

Programming Language: Python

Namespace/Package Name: newsroom.models

Class/Type: Article

Method/Function: topics

Examples at hotexamples.com: 2

Python Article.topics - 2 examples found. These are the top rated real world Python examples of newsroom.models.Article.topics extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Article(12)

category(12)

slug(12)

title(12)

save(12)

published(8)

author_01(6)

author_02(5)

body(4)

external_primary_image(4)

publish_now(3)

byline(2)

created(2)

author_03(2)

pk(2)

primary_image_caption(2)

primary_image_size(2)

summary_text(2)

subtitle(1)

is_published(1)

include_in_rss(1)

exclude_from_list_views(1)

disqus_id(1)

topics(1)

Example #1

Show file

File: exportfromdrupal.py Project: danielsinaga1/djangonewsfix

def process(start, finish, html_file):
    soup = BeautifulSoup(open(html_file), "lxml")
    articles = soup.find_all("div")
    print("Number of articles", len(articles))
    if finish > len(articles):
        print("Truncating finish to ", len(articles))
        finish = len(articles)
    if start > finish:
        print("Start > finish")
        return

    articles = articles[start:finish]
    for idx, article in enumerate(articles):
        # Title, Link and date published
        link = article.find("a")["href"]
        link = 'http://gutest.nathangeffen.webfactional.com' + str(link)
        if link.find("/gallery/") == -1:
            print("Processing: ", idx + start, link)
        else:
            print("Ignoring: ", idx + start, link)
            continue

        newarticle = Article()
        newarticle.title = article.find("p",
                                        attrs={'class' : 'title'}).get_text(). \
            replace("â€™", "'").replace("â€œ",'"').replace("â€",'"')
        newarticle.slug = slugify(str(link).rpartition("/")[2])
        import datetime
        published = article.find('p', attrs={'class' : 'date'}).get_text()[2:] \
                    + " +0200"
        published_conv = datetime.datetime.strptime(published,
                                                    "%d/%m/%Y - %H:%M %z")
        newarticle.published = published_conv

        # Fetch article from web
        import urllib.request
        with urllib.request.urlopen(link) as response:
            html = response.read()
        soup = BeautifulSoup(html, "lxml")

        # Byline
        try:
            newarticle.byline = soup.find \
            ("div", { "class" : "article-author" }).get_text()
        except:
            print("Byline not found")

        # Intro
        try:
            intro = soup.find("div", {"class": "article-intro"})
            intro = intro.find("p")
            intro["class"] = "intro"
            newarticle.summary_text = str(intro)
        except:
            print("Intro not found")

        try:
            primary_image = soup.find("div", {"class" : "article-image"}). \
                            find("img")
            link = primary_image["src"]
            link = link.replace("http://gutest.nathangeffen.webfactional.com",
                                "http://groundup.org.za")
            newarticle.external_primary_image = link.replace(
                "/column_width/", "/article_image/")
            newarticle.primary_image_size = "LEAVE"
        except:
            print("Primary image not found")

        try:
            newarticle.primary_image_caption = primary_image["alt"]
        except:
            print("Primary image caption not found")

        # Body

        try:
            text = soup.find("div", {"class":"article-body"}). \
                   find("div", {"class":"field-item"})
        except:
            print("No text")
            text = ""

        body = str(intro) + str(text)  # "".join([str(item) for item in text])
        newarticle.body = body

        # category

        try:
            category = soup.find("div", attrs={
                'class': 'article-category'
            }).get_text()

            if category.lower() == "news":
                newarticle.category = "news"
            elif category.lower() == "featured story":
                newarticle.category = "featured story"
            elif category.lower() == "photo essay":
                newarticle.category = "photo essay"
            elif category.lower() == "photo":
                newarticle.category = "photo"
            elif category.lower() == "opinion":
                newarticle.category = "opinion"
            elif category.lower() == "brief":
                newarticle.category = "brief"
            elif category.lower() == "analysis":
                newarticle.category = "analysis"
            else:
                print("Unknown category: ", category)
        except:
            print("No category")

        # Topics
        try:
            topics = soup.find("div", {"class":"article-subject"}). \
                     get_text(", ")
        except:
            print("No topics")
            topics = ""
        try:
            tags = soup.find("div", {"class": "article-tags"}).get_text(", ")
        except:
            tags = ""

        if tags:
            if topics:
                topics += ", " + tags
            else:
                topics = tags

        topics_split = topics.split(",")
        if len(topics_split) > 8:
            topics = ", ".join(topics_split[0:8])
        newarticle.topics = topics

        # Saving
        try:
            article_to_replace = Article.objects.get(slug=newarticle.slug)
        except Article.DoesNotExist:
            print("Saving as new article")
            newarticle.save()
        else:
            print("Updating existing article")
            newarticle.pk = article_to_replace.pk
            newarticle.created = article_to_replace.created
            newarticle.save()

Example #2

Show file

File: exportfromdrupal.py Project: groundupnews/gu

def process(start, finish, html_file):
    soup = BeautifulSoup(open(html_file), "lxml")
    articles = soup.find_all("div")
    print("Number of articles", len(articles))
    if finish > len(articles):
        print("Truncating finish to ", len(articles))
        finish = len(articles)
    if start > finish:
        print("Start > finish")
        return

    articles = articles[start:finish]
    for idx,article in enumerate(articles):
        # Title, Link and date published
        link = article.find("a")["href"]
        link = 'http://gutest.nathangeffen.webfactional.com' + str(link)
        if link.find("/gallery/") == -1:
            print("Processing: ", idx + start, link)
        else:
            print("Ignoring: ", idx + start, link)
            continue

        newarticle = Article()
        newarticle.title = article.find("p",
                                        attrs={'class' : 'title'}).get_text(). \
            replace("â€™", "'").replace("â€œ",'"').replace("â€",'"')
        newarticle.slug = slugify(str(link).rpartition("/")[2])
        import datetime
        published = article.find('p', attrs={'class' : 'date'}).get_text()[2:] \
                    + " +0200"
        published_conv = datetime.datetime.strptime(published,
                                                    "%d/%m/%Y - %H:%M %z")
        newarticle.published = published_conv

        # Fetch article from web
        import urllib.request
        with urllib.request.urlopen(link) as response:
            html = response.read()
        soup = BeautifulSoup(html, "lxml")

        # Byline
        try:
            newarticle.byline = soup.find \
            ("div", { "class" : "article-author" }).get_text()
        except:
            print("Byline not found")

        # Intro
        try:
            intro = soup.find("div", {"class" : "article-intro"})
            intro = intro.find("p")
            intro["class"] = "intro"
            newarticle.summary_text = str(intro)
        except:
            print("Intro not found")

        try:
            primary_image = soup.find("div", {"class" : "article-image"}). \
                            find("img")
            link = primary_image["src"]
            link = link.replace("http://gutest.nathangeffen.webfactional.com",
                                "http://groundup.org.za")
            newarticle.external_primary_image = link.replace("/column_width/",
                                                             "/article_image/")
            newarticle.primary_image_size = "LEAVE"
        except:
            print("Primary image not found")

        try:
            newarticle.primary_image_caption = primary_image["alt"]
        except:
            print("Primary image caption not found")

        # Body

        try:
            text = soup.find("div", {"class":"article-body"}). \
                   find("div", {"class":"field-item"})
        except:
            print("No text")
            text = ""

        body = str(intro) + str(text) # "".join([str(item) for item in text])
        newarticle.body = body

        # category

        try:
            category = soup.find("div", attrs={'class' :
                                               'article-category'}).get_text()

            if category.lower() == "news":
                newarticle.category = "news"
            elif category.lower() == "featured story":
                newarticle.category = "featured story"
            elif category.lower() == "photo essay":
                newarticle.category = "photo essay"
            elif category.lower() == "photo":
                newarticle.category = "photo"
            elif category.lower() == "opinion":
                newarticle.category = "opinion"
            elif category.lower() == "brief":
                newarticle.category = "brief"
            elif category.lower() == "analysis":
                newarticle.category = "analysis"
            else:
                print("Unknown category: ", category)
        except:
            print("No category")

        # Topics
        try:
            topics = soup.find("div", {"class":"article-subject"}). \
                     get_text(", ")
        except:
            print("No topics")
            topics = ""
        try:
            tags = soup.find("div", {"class":"article-tags"}).get_text(", ")
        except:
            tags = ""

        if tags:
            if topics:
                topics += ", " + tags
            else:
                topics = tags

        topics_split = topics.split(",")
        if len(topics_split) > 8:
            topics = ", ".join(topics_split[0:8])
        newarticle.topics = topics

        # Saving
        try:
            article_to_replace = Article.objects.get(slug=newarticle.slug)
        except Article.DoesNotExist:
            print("Saving as new article")
            newarticle.save()
        else:
            print("Updating existing article")
            newarticle.pk = article_to_replace.pk
            newarticle.created = article_to_replace.created
            newarticle.save()