Example #1
0
    def setUpTestData(cls):
        cls.client = Client()

        category = Category()
        category.name = "News"
        category.slug = "news"
        category.save()

        a = Article()
        a.title = "Test article 1"
        a.slug = "test-article-1"
        a.category = Category.objects.get(name="News")
        a.save()
        a.publish_now()

        h = TwitterHandle()
        h.name = "groundup_news"
        h.slug = "groundup_news"
        h.save()

        t = Tweet()
        t.article = a
        t.wait_time = 0
        t.status = "scheduled"
        t.tweet_text = "Test tweet " + str(a.published)
        t.save()
        t.tag_accounts.add(h)
        t.save()
Example #2
0
    def setUp(self):
        category = Category()
        category.name = "News"
        category.slug = "news"
        category.save()

        a = Article()
        a.title = "Test article 1"
        a.slug = "test-article-1"
        a.category = Category.objects.get(name="News")
        a.save()
        a.publish_now()

        h = TwitterHandle()
        h.name = "groundup_news"
        h.slug = "groundup_news"
        h.save()

        t = Tweet()
        t.article = a
        t.wait_time = 0
        t.status = "scheduled"
        t.tweet_text = "Test tweet " + str(a.published)
        t.save()
        t.tag_accounts.add(h)
        t.save()
Example #3
0
 def test_duplicate_save(self):
     a = Article()
     a.title = "Test article 3"
     a.category = Category.objects.get(name__iexact="news")
     a.slug = "test-article-1"
     shouldHaveFailed = True
     try:
         a.save()
     except IntegrityError:
         shouldHaveFailed = False
     self.assertEqual(shouldHaveFailed, False)
Example #4
0
 def test_duplicate_save(self):
     a = Article()
     a.title = "Test article 3"
     a.category = Category.objects.get(name__iexact="news")
     a.slug = "test-article-1"
     shouldHaveFailed = True
     try:
         a.save()
     except IntegrityError:
         shouldHaveFailed = False
     self.assertEqual(shouldHaveFailed, False)
Example #5
0
    def setUpTestData(cls):
        republisher = Republisher()
        republisher.name = "NYT"
        republisher.email_addresses = "[email protected],[email protected]"
        republisher.message = "Hi. Please republish this."
        republisher.save()

        topic = Topic()
        topic.name = "government"
        topic.slug = "government"
        topic.save()

        category = Category()
        category.name = "Feature"
        category.slug = "feature"
        category.save()

        category = Category()
        category.name = "Photo essay"
        category.slug = "photo-essay"
        category.save()

        category = Category()
        category.name = "Opinion"
        category.slug = "opinion"
        category.save()

        category = Category()
        category.name = "Photo"
        category.slug = "photo"
        category.save()

        category = Category()
        category.name = "News"
        category.slug = "news"
        category.save()

        article = Article()
        article.title = "Test article 1"
        article.body = "<p>The quick brown fox jumps over the lazy dog.</p>"
        article.slug = "test-article-1"
        article.category = Category.objects.get(name="News")
        article.external_primary_image = \
            "http://www.w3schools.com/html/pic_mountain.jpg"
        article.save()
        article.publish_now()

        republisherArticle  = RepublisherArticle()
        republisherArticle.article = article
        republisherArticle.republisher = republisher
        republisherArticle.save()
def process(start, finish, html_file):
    soup = BeautifulSoup(open(html_file), "lxml")
    articles = soup.find_all("div")
    print("Number of articles", len(articles))
    if finish > len(articles):
        print("Truncating finish to ", len(articles))
        finish = len(articles)
    if start > finish:
        print("Start > finish")
        return

    articles = articles[start:finish]
    for idx, article in enumerate(articles):
        # Title, Link and date published
        link = article.find("a")["href"]
        link = 'http://gutest.nathangeffen.webfactional.com' + str(link)
        if link.find("/gallery/") == -1:
            print("Processing: ", idx + start, link)
        else:
            print("Ignoring: ", idx + start, link)
            continue

        newarticle = Article()
        newarticle.title = article.find("p",
                                        attrs={'class' : 'title'}).get_text(). \
            replace("’", "'").replace("“",'"').replace("â€",'"')
        newarticle.slug = slugify(str(link).rpartition("/")[2])
        import datetime
        published = article.find('p', attrs={'class' : 'date'}).get_text()[2:] \
                    + " +0200"
        published_conv = datetime.datetime.strptime(published,
                                                    "%d/%m/%Y - %H:%M %z")
        newarticle.published = published_conv

        # Fetch article from web
        import urllib.request
        with urllib.request.urlopen(link) as response:
            html = response.read()
        soup = BeautifulSoup(html, "lxml")

        # Byline
        try:
            newarticle.byline = soup.find \
            ("div", { "class" : "article-author" }).get_text()
        except:
            print("Byline not found")

        # Intro
        try:
            intro = soup.find("div", {"class": "article-intro"})
            intro = intro.find("p")
            intro["class"] = "intro"
            newarticle.summary_text = str(intro)
        except:
            print("Intro not found")

        try:
            primary_image = soup.find("div", {"class" : "article-image"}). \
                            find("img")
            link = primary_image["src"]
            link = link.replace("http://gutest.nathangeffen.webfactional.com",
                                "http://groundup.org.za")
            newarticle.external_primary_image = link.replace(
                "/column_width/", "/article_image/")
            newarticle.primary_image_size = "LEAVE"
        except:
            print("Primary image not found")

        try:
            newarticle.primary_image_caption = primary_image["alt"]
        except:
            print("Primary image caption not found")

        # Body

        try:
            text = soup.find("div", {"class":"article-body"}). \
                   find("div", {"class":"field-item"})
        except:
            print("No text")
            text = ""

        body = str(intro) + str(text)  # "".join([str(item) for item in text])
        newarticle.body = body

        # category

        try:
            category = soup.find("div", attrs={
                'class': 'article-category'
            }).get_text()

            if category.lower() == "news":
                newarticle.category = "news"
            elif category.lower() == "featured story":
                newarticle.category = "featured story"
            elif category.lower() == "photo essay":
                newarticle.category = "photo essay"
            elif category.lower() == "photo":
                newarticle.category = "photo"
            elif category.lower() == "opinion":
                newarticle.category = "opinion"
            elif category.lower() == "brief":
                newarticle.category = "brief"
            elif category.lower() == "analysis":
                newarticle.category = "analysis"
            else:
                print("Unknown category: ", category)
        except:
            print("No category")

        # Topics
        try:
            topics = soup.find("div", {"class":"article-subject"}). \
                     get_text(", ")
        except:
            print("No topics")
            topics = ""
        try:
            tags = soup.find("div", {"class": "article-tags"}).get_text(", ")
        except:
            tags = ""

        if tags:
            if topics:
                topics += ", " + tags
            else:
                topics = tags

        topics_split = topics.split(",")
        if len(topics_split) > 8:
            topics = ", ".join(topics_split[0:8])
        newarticle.topics = topics

        # Saving
        try:
            article_to_replace = Article.objects.get(slug=newarticle.slug)
        except Article.DoesNotExist:
            print("Saving as new article")
            newarticle.save()
        else:
            print("Updating existing article")
            newarticle.pk = article_to_replace.pk
            newarticle.created = article_to_replace.created
            newarticle.save()
Example #7
0
def process(start, finish, html_file):
    soup = BeautifulSoup(open(html_file), "lxml")
    articles = soup.find_all("div")
    print("Number of articles", len(articles))
    if finish > len(articles):
        print("Truncating finish to ", len(articles))
        finish = len(articles)
    if start > finish:
        print("Start > finish")
        return

    articles = articles[start:finish]
    for idx,article in enumerate(articles):
        # Title, Link and date published
        link = article.find("a")["href"]
        link = 'http://gutest.nathangeffen.webfactional.com' + str(link)
        if link.find("/gallery/") == -1:
            print("Processing: ", idx + start, link)
        else:
            print("Ignoring: ", idx + start, link)
            continue

        newarticle = Article()
        newarticle.title = article.find("p",
                                        attrs={'class' : 'title'}).get_text(). \
            replace("’", "'").replace("“",'"').replace("â€",'"')
        newarticle.slug = slugify(str(link).rpartition("/")[2])
        import datetime
        published = article.find('p', attrs={'class' : 'date'}).get_text()[2:] \
                    + " +0200"
        published_conv = datetime.datetime.strptime(published,
                                                    "%d/%m/%Y - %H:%M %z")
        newarticle.published = published_conv

        # Fetch article from web
        import urllib.request
        with urllib.request.urlopen(link) as response:
            html = response.read()
        soup = BeautifulSoup(html, "lxml")

        # Byline
        try:
            newarticle.byline = soup.find \
            ("div", { "class" : "article-author" }).get_text()
        except:
            print("Byline not found")

        # Intro
        try:
            intro = soup.find("div", {"class" : "article-intro"})
            intro = intro.find("p")
            intro["class"] = "intro"
            newarticle.summary_text = str(intro)
        except:
            print("Intro not found")

        try:
            primary_image = soup.find("div", {"class" : "article-image"}). \
                            find("img")
            link = primary_image["src"]
            link = link.replace("http://gutest.nathangeffen.webfactional.com",
                                "http://groundup.org.za")
            newarticle.external_primary_image = link.replace("/column_width/",
                                                             "/article_image/")
            newarticle.primary_image_size = "LEAVE"
        except:
            print("Primary image not found")

        try:
            newarticle.primary_image_caption = primary_image["alt"]
        except:
            print("Primary image caption not found")

        # Body

        try:
            text = soup.find("div", {"class":"article-body"}). \
                   find("div", {"class":"field-item"})
        except:
            print("No text")
            text = ""

        body = str(intro) + str(text) # "".join([str(item) for item in text])
        newarticle.body = body

        # category

        try:
            category = soup.find("div", attrs={'class' :
                                               'article-category'}).get_text()

            if category.lower() == "news":
                newarticle.category = "news"
            elif category.lower() == "featured story":
                newarticle.category = "featured story"
            elif category.lower() == "photo essay":
                newarticle.category = "photo essay"
            elif category.lower() == "photo":
                newarticle.category = "photo"
            elif category.lower() == "opinion":
                newarticle.category = "opinion"
            elif category.lower() == "brief":
                newarticle.category = "brief"
            elif category.lower() == "analysis":
                newarticle.category = "analysis"
            else:
                print("Unknown category: ", category)
        except:
            print("No category")

        # Topics
        try:
            topics = soup.find("div", {"class":"article-subject"}). \
                     get_text(", ")
        except:
            print("No topics")
            topics = ""
        try:
            tags = soup.find("div", {"class":"article-tags"}).get_text(", ")
        except:
            tags = ""

        if tags:
            if topics:
                topics += ", " + tags
            else:
                topics = tags

        topics_split = topics.split(",")
        if len(topics_split) > 8:
            topics = ", ".join(topics_split[0:8])
        newarticle.topics = topics

        # Saving
        try:
            article_to_replace = Article.objects.get(slug=newarticle.slug)
        except Article.DoesNotExist:
            print("Saving as new article")
            newarticle.save()
        else:
            print("Updating existing article")
            newarticle.pk = article_to_replace.pk
            newarticle.created = article_to_replace.created
            newarticle.save()
Example #8
0
 def test_published(self):
     num_published = Article.objects.published().count()
     a = Article()
     a.title = "Test article 3"
     a.slug = "test-article-3"
     a.category = Category.objects.get(name="News")
     a.published = timezone.now()
     a.save()
     num_published_now = Article.objects.published().count()
     self.assertEqual(num_published + 1, num_published_now)
     a = Article()
     a.title = "Test article 4"
     a.category = Category.objects.get(name="News")
     a.slug = "test-article-4"
     a.published = timezone.now() + datetime.timedelta(hours=10)
     a.save()
     num_published_now = Article.objects.published().count()
     self.assertEqual(num_published + 1, num_published_now)
     self.assertEqual(a.is_published(), False)
Example #9
0
    def setUpTestData(cls):
        cls.client = Client()

        fund = Fund()
        fund.name = "Bertha|Reporters"
        fund.save()

        category = Category()
        category.name = "Video"
        category.slug = "video"
        category.save()

        category = Category()
        category.name = "News"
        category.slug = "news"
        category.save()

        author1 = Author()
        author1.first_names = "Joe"
        author1.last_name = "Bloggs"
        author1.email = "*****@*****.**"
        author1.freelancer = "f"
        author1.save()
        author2 = Author()
        author2.first_names = "Jane"
        author2.last_name = "Doe"
        author2.email = "*****@*****.**"
        author2.freelancer = "c"
        author2.save()
        author3 = Author()
        author3.first_names = "Lois"
        author3.last_name = "Lane"
        author3.email = "*****@*****.**"
        author3.freelancer = "n"
        author3.save()

        article1 = Article()
        article1.title = "Test commission 1"
        article1.slug = "test-commission-1"
        article1.category = Category.objects.get(name="News")
        article1.published = timezone.now()
        article1.author_01 = author1
        article1.author_02 = author2
        article1.author_03 = author3
        article1.save()

        article2 = Article()
        article2.title = "Test commission 2"
        article2.slug = "test-commission-2"
        article2.category = Category.objects.get(name="News")
        article2.published = timezone.now()
        article2.author_01 = author1
        article2.save()

        article3 = Article()
        article3.title = "Test commission 3"
        article3.slug = "test-commission-3"
        article3.category = Category.objects.get(name="News")
        article3.published = timezone.now()
        article3.author_02 = author2
        article3.save()

        article4 = Article()
        article4.title = "Test commission 4"
        article4.slug = "test-commission-4"
        article4.category = Category.objects.get(name="News")
        article4.published = timezone.now()
        article4.author_01 = author1
        article4.author_02 = author2
        article4.save()

        article5 = Article()
        article5.title = "Test commission 5"
        article5.slug = "test-commission-5"
        article5.category = Category.objects.get(name="News")
        article5.published = timezone.now()
        article5.author_01 = author1
        article5.author_02 = author2
        article5.save()

        article6 = Article()
        article6.title = "Test commission 6"
        article6.slug = "test-commission-6"
        article6.category = Category.objects.get(name="News")
        article6.published = timezone.now()
        article6.author_01 = author1
        article6.author_02 = author2
        article6.save()

        article7 = Article()
        article7.title = "Test commission 7"
        article7.slug = "test-commission-7"
        article7.category = Category.objects.get(name="News")
        article7.published = timezone.now()
        article7.author_01 = author2
        article7.author_02 = author1
        article7.save()

        article8 = Article()
        article8.title = "Test commission 8"
        article8.slug = "test-commission-8"
        article8.category = Category.objects.get(name="News")
        article8.published = timezone.now()
        article8.author_01 = author2
        article8.author_02 = author1
        article8.save()

        article9 = Article()
        article9.title = "Test commission 9"
        article9.slug = "test-commission-9"
        article9.category = Category.objects.get(name="News")
        article9.published = timezone.now()
        article9.author_01 = author2
        article9.author_02 = author1
        article9.save()

        article10 = Article()
        article10.title = "Test commission 10"
        article10.slug = "test-commission-10"
        article10.category = Category.objects.get(name="News")
        article10.published = timezone.now()
        article10.author_01 = author2
        article10.save()

        article20 = Article()
        article20.title = "Test commission 20"
        article20.slug = "test-commission-20"
        article20.category = Category.objects.get(name="Video")
        article20.published = timezone.now()
        article20.author_01 = author2
        article20.save()
Example #10
0
    def setUpTestData(cls):
        cls.client = Client()

        fund = Fund()
        fund.name = "Bertha|Reporters"
        fund.save()

        category = Category()
        category.name = "News"
        category.slug = "news"
        category.save()

        author1 = Author()
        author1.first_names = "Joe"
        author1.last_name = "Bloggs"
        author1.email = "*****@*****.**"
        author1.freelancer = True
        author1.save()
        author2 = Author()
        author2.first_names = "Jane"
        author2.last_name = "Doe"
        author2.email = "*****@*****.**"
        author2.freelancer = True
        author2.save()
        author3 = Author()
        author3.first_names = "Lois"
        author3.last_name = "Lane"
        author3.email = "*****@*****.**"
        author3.freelancer = False
        author3.save()

        article1 = Article()
        article1.title = "Test commission 1"
        article1.slug = "test-commission-1"
        article1.category = Category.objects.get(name="News")
        article1.published = timezone.now()
        article1.author_01 = author1
        article1.author_02 = author2
        article1.author_03 = author3
        article1.save()

        article2 = Article()
        article2.title = "Test commission 2"
        article2.slug = "test-commission-2"
        article2.category = Category.objects.get(name="News")
        article2.published = timezone.now()
        article2.author_01 = author1
        article2.save()

        article3 = Article()
        article3.title = "Test commission 3"
        article3.slug = "test-commission-3"
        article3.category = Category.objects.get(name="News")
        article3.published = timezone.now()
        article3.author_01 = author1
        article3.save()

        article4 = Article()
        article4.title = "Test commission 4"
        article4.slug = "test-commission-4"
        article4.category = Category.objects.get(name="News")
        article4.published = timezone.now()
        article4.author_01 = author1
        article4.author_02 = author2
        article4.save()
Example #11
0
def process(start, finish, html_file):
    soup = BeautifulSoup(open(html_file), "lxml")

    articles = soup.find_all("div")
    # for article in articles:
    #     url = article.find("a")["href"]
    #     title = article.find("p", attrs={'class' : 'title'}).get_text()
    #     article_date = article.find("p", attrs={'class' : 'date'}).get_text()
    #     title = title.replace('\n', ' ').replace('\r', '')
    #     article_date = article_date.replace('\n', ' ').replace('\r', '')
    #     print( (url + " @ " + title + " | " + article_date).strip() )
    # return

    with open(html_file) as f:
        articles = f.readlines()
    print("Number of articles", len(articles))
    if finish > len(articles):
        print("Truncating finish to ", len(articles))
        finish = len(articles)
    if start > finish:
        print("Start > finish")
        return

    articles = articles[start:finish]

    for idx,line in enumerate(articles):
        link = line.partition(" @ ")[0]
        title = line.partition(" @ ")[2].partition(" | ")[0].strip()
        if len(title) > 200:
            title = title[0:200]
        date_time = line.partition(" | ")[2].strip() + " +0200"
        slug = slugify(link.rpartition("/")[2])
        new_link = "http://groundup.org.za/article/" + slug
        old_link = "http://gutest.nathangeffen.webfactional.com" + link

        html = ""

        newarticle = Article()
        newarticle.title = title
        newarticle.slug = slug
        import datetime
        newarticle.published = datetime.datetime.strptime(date_time,
                                                    "%d/%m/%Y - %H:%M %z")
        html = ""
        try:
            with urllib.request.urlopen(old_link) as response:
                html = response.read()
        except:
            print("CRITICAL: Can't find old link", old_link)
            continue
        html = str(html)
        pos = html.find('<div class="field field-name-field-where field-type-text field-label-above"><div class="field-label">Where is the event:&nbsp;</div>')

        if pos == -1:
            print("PROCESSING:", str(idx + start) + " " + link)
        else:
            print("IGNORING:", link)
            continue

        soup = BeautifulSoup(html, "lxml")

        # Byline
        try:
            newarticle.byline = soup.find \
            ("div", { "class" : "article-author" }).get_text()
        except:
            newarticle.byline = ""
            print("Byline not found")

        # Intro
        intro = ""
        try:
            intro = soup.find("div", {"class" : "article-intro"})
            intro = intro.find("p")
            intro["class"] = "intro"
            newarticle.summary_text = str(intro)
        except:
            intro = ""
            print("Intro not found")

        primary_image = ""
        try:
            primary_image = soup.find("div", {"class" : "article-image"}). \
                            find("img")
            link = primary_image["src"]
            link = link.replace("http://gutest.nathangeffen.webfactional.com",
                                "http://groundup.org.za")
            newarticle.external_primary_image = link.replace("/column_width/",
                                                             "/article_image/")
            newarticle.primary_image_size = "LEAVE"
            primary_image_found = True
        except:
            print("Primary image not found")
            primary_image_found = False

        try:
            newarticle.primary_image_caption = primary_image["alt"]
        except:
            try:
                newarticle.primary_image_caption = str(soup.find("div", \
                    {"class": "field-name-field-image-description"})).\
                    replace("\\n","").replace("\\'","'")
            except:
                newarticle.primary_image_caption = ""
                print("Primary image caption not found")

        if newarticle.summary_text == "" and primary_image_found == False:
            intro = newarticle.primary_image_caption
            newarticle.primary_image_caption = ""
            print("Swapping caption and intro")

        try:
            text = soup.find("div", {"class":"article-body"}). \
                   find("div", {"class":"field-item"})
        except:
            try:
                text = soup.find("div", {"class":"content"}). \
                       find("div", {"class":"field-item"})
            except:
                print("No text")
                text = ""

        if str(intro) != "None":
            body = str(intro) + str(text) # "".join([str(item) for item in text])
        else:
            body = str(text)

        newarticle.body = body.replace("\\n"," ")

        # category

        try:
            category = soup.find("div", attrs={'class' :
                                               'article-category'}).get_text()

            if category.lower() == "news":
                newarticle.category = "news"
            elif category.lower() == "featured story":
                newarticle.category = "featured story"
            elif category.lower() == "photo essay":
                newarticle.category = "photo essay"
            elif category.lower() == "photo story":
                newarticle.category = "photo essay"
            elif category.lower() == "photo":
                newarticle.category = "photo"
            elif category.lower() == "opinion":
                newarticle.category = "opinion"
            elif category.lower() == "brief":
                newarticle.category = "brief"
            elif category.lower() == "analysis":
                newarticle.category = "analysis"
            else:
                print("Unknown category: ", category)
        except:
            print("No category")

        # # Topics
        # try:
        #     topics = soup.find("div", {"class":"article-subject"}). \
        #              get_text(", ")
        # except:
        #     print("No topics")
        #     topics = ""
        # try:
        #     tags = soup.find("div", {"class":"article-tags"}).get_text(", ")
        # except:
        #     tags = ""

        # if tags:
        #     if topics:
        #         topics += ", " + tags
        #     else:
        #         topics = tags

        # topics_split = topics.split(",")
        # if len(topics_split) > 8:
        #     topics = ", ".join(topics_split[0:8])
        # newarticle.topics = topics

        # Disqus
        newarticle.disqus_id = "node/"
        id = html.partition("/node/")[2][0:4]
        digits = [str(x) for x in list(range(10))]
        if id[0] in digits:
            newarticle.disqus_id = newarticle.disqus_id + id[0]
        if id[1] in digits:
            newarticle.disqus_id = newarticle.disqus_id + id[1]
        if id[2] in digits:
            newarticle.disqus_id = newarticle.disqus_id + id[2]
        if id[3] in digits:
            newarticle.disqus_id = newarticle.disqus_id + id[3]


        newarticle.include_in_rss = False
        newarticle.exclude_from_list_views = True

        # Saving
        try:
            article_to_replace = Article.objects.get(slug=newarticle.slug)
        except Article.DoesNotExist:
            print("Saving as new article")
            newarticle.save()
        else:
            print("Updating existing article")
            newarticle.pk = article_to_replace.pk
            newarticle.created = article_to_replace.created
            newarticle.save()
    return
Example #12
0
def process(start, finish, html_file):
    soup = BeautifulSoup(open(html_file), "lxml")

    articles = soup.find_all("div")
    # for article in articles:
    #     url = article.find("a")["href"]
    #     title = article.find("p", attrs={'class' : 'title'}).get_text()
    #     article_date = article.find("p", attrs={'class' : 'date'}).get_text()
    #     title = title.replace('\n', ' ').replace('\r', '')
    #     article_date = article_date.replace('\n', ' ').replace('\r', '')
    #     print( (url + " @ " + title + " | " + article_date).strip() )
    # return

    with open(html_file) as f:
        articles = f.readlines()
    print("Number of articles", len(articles))
    if finish > len(articles):
        print("Truncating finish to ", len(articles))
        finish = len(articles)
    if start > finish:
        print("Start > finish")
        return

    articles = articles[start:finish]

    for idx, line in enumerate(articles):
        link = line.partition(" @ ")[0]
        title = line.partition(" @ ")[2].partition(" | ")[0].strip()
        if len(title) > 200:
            title = title[0:200]
        date_time = line.partition(" | ")[2].strip() + " +0200"
        slug = slugify(link.rpartition("/")[2])
        new_link = "http://groundup.org.za/article/" + slug
        old_link = "http://gutest.nathangeffen.webfactional.com" + link

        html = ""

        newarticle = Article()
        newarticle.title = title
        newarticle.slug = slug
        import datetime
        newarticle.published = datetime.datetime.strptime(
            date_time, "%d/%m/%Y - %H:%M %z")
        html = ""
        try:
            with urllib.request.urlopen(old_link) as response:
                html = response.read()
        except:
            print("CRITICAL: Can't find old link", old_link)
            continue
        html = str(html)
        pos = html.find(
            '<div class="field field-name-field-where field-type-text field-label-above"><div class="field-label">Where is the event:&nbsp;</div>'
        )

        if pos == -1:
            print("PROCESSING:", str(idx + start) + " " + link)
        else:
            print("IGNORING:", link)
            continue

        soup = BeautifulSoup(html, "lxml")

        # Byline
        try:
            newarticle.byline = soup.find \
            ("div", { "class" : "article-author" }).get_text()
        except:
            newarticle.byline = ""
            print("Byline not found")

        # Intro
        intro = ""
        try:
            intro = soup.find("div", {"class": "article-intro"})
            intro = intro.find("p")
            intro["class"] = "intro"
            newarticle.summary_text = str(intro)
        except:
            intro = ""
            print("Intro not found")

        primary_image = ""
        try:
            primary_image = soup.find("div", {"class" : "article-image"}). \
                            find("img")
            link = primary_image["src"]
            link = link.replace("http://gutest.nathangeffen.webfactional.com",
                                "http://groundup.org.za")
            newarticle.external_primary_image = link.replace(
                "/column_width/", "/article_image/")
            newarticle.primary_image_size = "LEAVE"
            primary_image_found = True
        except:
            print("Primary image not found")
            primary_image_found = False

        try:
            newarticle.primary_image_caption = primary_image["alt"]
        except:
            try:
                newarticle.primary_image_caption = str(soup.find("div", \
                    {"class": "field-name-field-image-description"})).\
                    replace("\\n","").replace("\\'","'")
            except:
                newarticle.primary_image_caption = ""
                print("Primary image caption not found")

        if newarticle.summary_text == "" and primary_image_found == False:
            intro = newarticle.primary_image_caption
            newarticle.primary_image_caption = ""
            print("Swapping caption and intro")

        try:
            text = soup.find("div", {"class":"article-body"}). \
                   find("div", {"class":"field-item"})
        except:
            try:
                text = soup.find("div", {"class":"content"}). \
                       find("div", {"class":"field-item"})
            except:
                print("No text")
                text = ""

        if str(intro) != "None":
            body = str(intro) + str(
                text)  # "".join([str(item) for item in text])
        else:
            body = str(text)

        newarticle.body = body.replace("\\n", " ")

        # category

        try:
            category = soup.find("div", attrs={
                'class': 'article-category'
            }).get_text()

            if category.lower() == "news":
                newarticle.category = "news"
            elif category.lower() == "featured story":
                newarticle.category = "featured story"
            elif category.lower() == "photo essay":
                newarticle.category = "photo essay"
            elif category.lower() == "photo story":
                newarticle.category = "photo essay"
            elif category.lower() == "photo":
                newarticle.category = "photo"
            elif category.lower() == "opinion":
                newarticle.category = "opinion"
            elif category.lower() == "brief":
                newarticle.category = "brief"
            elif category.lower() == "analysis":
                newarticle.category = "analysis"
            else:
                print("Unknown category: ", category)
        except:
            print("No category")

        # # Topics
        # try:
        #     topics = soup.find("div", {"class":"article-subject"}). \
        #              get_text(", ")
        # except:
        #     print("No topics")
        #     topics = ""
        # try:
        #     tags = soup.find("div", {"class":"article-tags"}).get_text(", ")
        # except:
        #     tags = ""

        # if tags:
        #     if topics:
        #         topics += ", " + tags
        #     else:
        #         topics = tags

        # topics_split = topics.split(",")
        # if len(topics_split) > 8:
        #     topics = ", ".join(topics_split[0:8])
        # newarticle.topics = topics

        # Disqus
        newarticle.disqus_id = "node/"
        id = html.partition("/node/")[2][0:4]
        digits = [str(x) for x in list(range(10))]
        if id[0] in digits:
            newarticle.disqus_id = newarticle.disqus_id + id[0]
        if id[1] in digits:
            newarticle.disqus_id = newarticle.disqus_id + id[1]
        if id[2] in digits:
            newarticle.disqus_id = newarticle.disqus_id + id[2]
        if id[3] in digits:
            newarticle.disqus_id = newarticle.disqus_id + id[3]

        newarticle.include_in_rss = False
        newarticle.exclude_from_list_views = True

        # Saving
        try:
            article_to_replace = Article.objects.get(slug=newarticle.slug)
        except Article.DoesNotExist:
            print("Saving as new article")
            newarticle.save()
        else:
            print("Updating existing article")
            newarticle.pk = article_to_replace.pk
            newarticle.created = article_to_replace.created
            newarticle.save()
    return
Example #13
0
    def test_commissions(self):
        fund = Fund.objects.get(name="Bertha|Reporters")
        author1 = Author.objects.get(email="*****@*****.**")
        author2 = Author.objects.get(email="*****@*****.**")
        from django.core import management
        management.call_command('processinvoices')
        commissions = Commission.objects.all()
        self.assertEqual(len(commissions), 6)
        for commission in commissions:
            commission.commission_due = Decimal(900.00)
            commission.fund = fund
            commission.save()
        c = Commission.objects.filter(date_notified_approved__isnull=True)
        self.assertEqual(len(c), 6)

        invoices = Invoice.objects.filter(status="-")
        self.assertEqual(len(invoices), 2)
        for invoice in invoices:
            invoice.status = "0"
            invoice.save()

        management.call_command('processinvoices')
        c = Commission.objects.filter(date_notified_approved__isnull=True)
        self.assertEqual(len(c), 0)
        invoices = Invoice.objects.all()
        self.assertEqual(len(invoices), 2)
        invoices = Invoice.objects.filter(status="0")
        self.assertEqual(len(invoices), 2)
        invoices = Invoice.objects.filter(status="4")
        self.assertEqual(len(invoices), 0)
        invoices = Invoice.objects.filter(status="0")
        for invoice in invoices:
            invoice.status = "4"
            invoice.save()
        invoices = Invoice.objects.filter(status="4")
        self.assertEqual(len(invoices), 2)
        invoices = Invoice.objects.filter(date_notified_payment__isnull=True)
        self.assertEqual(len(invoices), 2)
        management.call_command('processinvoices')
        invoices = Invoice.objects.filter(date_notified_payment__isnull=False)
        self.assertEqual(len(invoices), 2)

        article5 = Article()
        article5.title = "Test commission 5"
        article5.slug = "test-commission-5"
        article5.category = Category.objects.get(name="News")
        article5.published = timezone.now()
        article5.author_01 = author1
        article5.author_02 = author2
        article5.save()

        management.call_command('processinvoices')
        invoices = Invoice.objects.filter(status="-")
        self.assertEqual(len(invoices), 2)
        for invoice in invoices:
            self.assertEqual(invoice.invoice_num, 2)
            invoice.status = "0"
            invoice.save()

        commissions = Commission.objects.filter(fund__isnull=True)
        self.assertEqual(len(commissions), 2)
        for commission in commissions:
            commission.commission_due = Decimal(900.00)
            commission.fund = fund
            commission.save()
        for invoice in invoices:
            invoice.status = "4"
        management.call_command('processinvoices')
        invoices = Invoice.objects.filter(date_notified_payment__isnull=True)
        self.assertEqual(len(invoices), 2)
        management.call_command('processinvoices')
        invoices = Invoice.objects.filter(date_notified_payment__isnull=False)
        self.assertEqual(len(invoices), 2)
        for email in mail.outbox:
            print("From:", email.from_email)
            print("To:", email.to)
            print("Subject:", email.subject)
            print("Body:", email.body)
Example #14
0
    def setUp(self):

        fund = Fund()
        fund.name = "Bertha|Reporters"
        fund.save()

        category = Category()
        category.name = "News"
        category.slug = "news"
        category.save()


        author1 = Author()
        author1.first_names = "Joe"
        author1.last_name = "Bloggs"
        author1.email = "*****@*****.**"
        author1.freelancer = True
        author1.save()
        author2 = Author()
        author2.first_names = "Jane"
        author2.last_name = "Doe"
        author2.email = "*****@*****.**"
        author2.freelancer = True
        author2.save()
        author3 = Author()
        author3.first_names = "Lois"
        author3.last_name = "Lane"
        author3.email = "*****@*****.**"
        author3.freelancer = False
        author3.save()

        article1 = Article()
        article1.title = "Test commission 1"
        article1.slug = "test-commission-1"
        article1.category = Category.objects.get(name="News")
        article1.published = timezone.now()
        article1.author_01 = author1
        article1.author_02 = author2
        article1.author_03 = author3
        article1.save()

        article2 = Article()
        article2.title = "Test commission 2"
        article2.slug = "test-commission-2"
        article2.category = Category.objects.get(name="News")
        article2.published = timezone.now()
        article2.author_01 = author1
        article2.save()

        article3 = Article()
        article3.title = "Test commission 3"
        article3.slug = "test-commission-3"
        article3.category = Category.objects.get(name="News")
        article3.published = timezone.now()
        article3.author_01 = author1
        article3.save()

        article4 = Article()
        article4.title = "Test commission 4"
        article4.slug = "test-commission-4"
        article4.category = Category.objects.get(name="News")
        article4.published = timezone.now()
        article4.author_01 = author1
        article4.author_02 = author2
        article4.save()
Example #15
0
    def setUp(self):
        topic = Topic()
        topic.name = "government"
        topic.slug = "government"
        topic.save()

        category = Category()
        category.name = "Feature"
        category.slug = "feature"
        category.save()

        category = Category()
        category.name = "Photo essay"
        category.slug = "photo-essay"
        category.save()

        category = Category()
        category.name = "Opinion"
        category.slug = "opinion"
        category.save()

        category = Category()
        category.name = "Photo"
        category.slug = "photo"
        category.save()

        category = Category()
        category.name = "News"
        category.slug = "news"
        category.save()

        a = Article()
        a.title = "Test article 1"
        a.slug = "test-article-1"
        a.category = Category.objects.get(name="News")
        a.external_primary_image = \
            "http://www.w3schools.com/html/pic_mountain.jpg"
        a.save()
        a.publish_now()

        a = Article()
        a.title = "Test article 2"
        a.slug = "test-article-2"
        a.category = Category.objects.get(slug="opinion")
        a.save()
        a.publish_now()
Example #16
0
 def test_published(self):
     num_published = Article.objects.published().count()
     a = Article()
     a.title = "Test article 3"
     a.slug = "test-article-3"
     a.category = Category.objects.get(name="News")
     a.published = timezone.now()
     a.save()
     num_published_now = Article.objects.published().count()
     self.assertEqual(num_published + 1, num_published_now)
     a = Article()
     a.title = "Test article 4"
     a.category = Category.objects.get(name="News")
     a.slug = "test-article-4"
     a.published = timezone.now() + datetime.timedelta(hours=10)
     a.save()
     num_published_now = Article.objects.published().count()
     self.assertEqual(num_published + 1, num_published_now)
     self.assertEqual(a.is_published(), False)
Example #17
0
    def setUpTestData(cls):
        cls.client = Client()

        topic = Topic()
        topic.name = "government"
        topic.slug = "government"
        topic.save()

        category = Category()
        category.name = "Feature"
        category.slug = "feature"
        category.save()

        category = Category()
        category.name = "Photo essay"
        category.slug = "photo-essay"
        category.save()

        category = Category()
        category.name = "Opinion"
        category.slug = "opinion"
        category.save()

        category = Category()
        category.name = "Photo"
        category.slug = "photo"
        category.save()

        category = Category()
        category.name = "News"
        category.slug = "news"
        category.save()

        a = Article()
        a.title = "Test article 1"
        a.body = "<p>The quick brown fox jumps over the lazy dog.</p>"
        a.slug = "test-article-1"
        a.category = Category.objects.get(name="News")
        a.external_primary_image = \
            "http://www.w3schools.com/html/pic_mountain.jpg"
        a.save()
        a.publish_now()

        a = Article()
        a.title = "Test article 2"
        a.subtitle = "Dogs and things"
        a.body = "<p>How now brown cow.</p>"
        a.slug = "test-article-2"
        a.category = Category.objects.get(slug="opinion")
        a.save()
        a.publish_now()

        author = Author()
        author.first_names = "Joe"
        author.last_name = "Bloggs"
        author.email = "*****@*****.**"
        author.save()
        a.author_01 = author
        a.save()
Example #18
0
    def test_commissions(self):
        fund = Fund.objects.get(name="Bertha|Reporters")
        author1 = Author.objects.get(email="*****@*****.**")
        author2 = Author.objects.get(email="*****@*****.**")
        from django.core import management
        management.call_command('processinvoices')
        commissions = Commission.objects.all()
        self.assertEqual(len(commissions), 6)
        for commission in commissions:
            commission.commission_due = Decimal(900.00)
            commission.fund = fund
            commission.save()
        c = Commission.objects.filter(date_notified_approved__isnull=True)
        self.assertEqual(len(c), 6)

        invoices = Invoice.objects.filter(status="-")
        self.assertEqual(len(invoices), 2)
        for invoice in invoices:
            invoice.status = "0"
            invoice.save()

        management.call_command('processinvoices')
        c = Commission.objects.filter(date_notified_approved__isnull=True)
        self.assertEqual(len(c), 0)
        invoices = Invoice.objects.all()
        self.assertEqual(len(invoices), 2)
        invoices = Invoice.objects.filter(status="0")
        self.assertEqual(len(invoices), 2)
        invoices = Invoice.objects.filter(status="4")
        self.assertEqual(len(invoices), 0)
        invoices = Invoice.objects.filter(status="0")
        for invoice in invoices:
            invoice.status = "4"
            invoice.save()
        invoices = Invoice.objects.filter(status="4")
        self.assertEqual(len(invoices), 2)
        invoices = Invoice.objects.filter(date_notified_payment__isnull=True)
        self.assertEqual(len(invoices), 2)
        management.call_command('processinvoices')
        invoices = Invoice.objects.filter(date_notified_payment__isnull=False)
        self.assertEqual(len(invoices), 2)

        article5 = Article()
        article5.title = "Test commission 5"
        article5.slug = "test-commission-5"
        article5.category = Category.objects.get(name="News")
        article5.published = timezone.now()
        article5.author_01 = author1
        article5.author_02 = author2
        article5.save()

        management.call_command('processinvoices')
        invoices = Invoice.objects.filter(status="-")
        self.assertEqual(len(invoices), 2)
        for invoice in invoices:
            self.assertEqual(invoice.invoice_num, 2)
            invoice.status = "0"
            invoice.save()

        commissions = Commission.objects.filter(fund__isnull=True)
        self.assertEqual(len(commissions), 2)
        for commission in commissions:
            commission.commission_due = Decimal(900.00)
            commission.fund = fund
            commission.save()
        for invoice in invoices:
            invoice.status = "4"
        management.call_command('processinvoices')
        invoices = Invoice.objects.filter(date_notified_payment__isnull=True)
        self.assertEqual(len(invoices), 2)
        management.call_command('processinvoices')
        invoices = Invoice.objects.filter(date_notified_payment__isnull=False)
        self.assertEqual(len(invoices), 2)
        for email in mail.outbox:
            print("From:", email.from_email)
            print("To:", email.to)
            print("Subject:", email.subject)
            print("Body:", email.body)
Example #19
0
    def test_commissions(self):
        fund = Fund.objects.get(name="Bertha|Reporters")
        author1 = Author.objects.get(email="*****@*****.**")
        author2 = Author.objects.get(email="*****@*****.**")
        from django.core import management
        management.call_command('processinvoices')
        commissions = Commission.objects.all()
        self.assertEqual(len(commissions), 18)
        for commission in commissions:
            commission.commission_due = Decimal(900.00)
            commission.fund = fund
            commission.save()
        c = Commission.objects.filter(date_notified_approved__isnull=True)
        self.assertEqual(len(c), 18)

        invoices = Invoice.objects.filter(status="-")
        self.assertEqual(len(invoices), 2)
        for invoice in invoices:
            invoice.status = "0"
            invoice.save()

        management.call_command('processinvoices')
        c = Commission.objects.filter(date_notified_approved__isnull=True)
        self.assertEqual(len(c), 0)
        invoices = Invoice.objects.all()
        self.assertEqual(len(invoices), 2)
        invoices = Invoice.objects.filter(status="0")
        self.assertEqual(len(invoices), 2)
        invoices = Invoice.objects.filter(status="4")
        self.assertEqual(len(invoices), 0)
        invoices = Invoice.objects.filter(status="0")
        for invoice in invoices:
            invoice.status = "4"
            invoice.save()
        invoices = Invoice.objects.filter(status="4")
        self.assertEqual(len(invoices), 2)
        invoices = Invoice.objects.filter(date_notified_payment__isnull=True)
        self.assertEqual(len(invoices), 2)
        management.call_command('processinvoices')
        invoices = Invoice.objects.filter(date_notified_payment__isnull=False)
        self.assertEqual(len(invoices), 2)

        article11 = Article()
        article11.title = "Test commission 11"
        article11.slug = "test-commission-11"
        article11.category = Category.objects.get(name="News")
        article11.published = timezone.now()
        article11.author_01 = author1
        article11.author_02 = author2
        article11.save()

        management.call_command('processinvoices')
        invoices = Invoice.objects.filter(status="-")
        self.assertEqual(len(invoices), 2)
        for invoice in invoices:
            self.assertEqual(invoice.invoice_num, 2)
            invoice.status = "0"
            invoice.save()

        commissions = Commission.objects.filter(fund__isnull=True)
        self.assertEqual(len(commissions), 2)
        for commission in commissions:
            commission.commission_due = Decimal(900.00)
            commission.fund = fund
            commission.save()
        for invoice in invoices:
            invoice.status = "4"
        management.call_command('processinvoices')
        invoices = Invoice.objects.filter(date_notified_payment__isnull=True)
        self.assertEqual(len(invoices), 2)
        management.call_command('processinvoices')
        invoices = Invoice.objects.filter(date_notified_payment__isnull=False)
        self.assertEqual(len(invoices), 2)

        commissions = Commission.objects.filter(
            invoice__author__last_name="Bloggs")
        num_bonuses = len(
            [True for c in commissions if c.estimate_bonus() > 0])
        self.assertEqual(num_bonuses, 3)

        commissions = Commission.objects.filter(
            invoice__author__last_name="Doe")
        num_bonuses = len(
            [True for c in commissions if c.estimate_bonus() > 0])
        self.assertEqual(num_bonuses, 0)
Example #20
0
    def test_commissions(self):
        fund = Fund.objects.get(name="Bertha|Reporters")
        author1 = Author.objects.get(email="*****@*****.**")
        author2 = Author.objects.get(email="*****@*****.**")
        from django.core import management
        management.call_command('processinvoices')
        commissions = Commission.objects.all()
        self.assertEqual(len(commissions), 18)
        for commission in commissions:
            commission.commission_due = Decimal(900.00)
            commission.fund = fund
            commission.save()
        c = Commission.objects.filter(date_notified_approved__isnull=True)
        self.assertEqual(len(c), 18)

        invoices = Invoice.objects.filter(status="-")
        self.assertEqual(len(invoices), 2)
        for invoice in invoices:
            invoice.status = "0"
            invoice.save()

        management.call_command('processinvoices')
        c = Commission.objects.filter(date_notified_approved__isnull=True)
        self.assertEqual(len(c), 0)
        invoices = Invoice.objects.all()
        self.assertEqual(len(invoices), 2)
        invoices = Invoice.objects.filter(status="0")
        self.assertEqual(len(invoices), 2)
        invoices = Invoice.objects.filter(status="4")
        self.assertEqual(len(invoices), 0)
        invoices = Invoice.objects.filter(status="0")
        for invoice in invoices:
            invoice.status = "4"
            invoice.save()
        invoices = Invoice.objects.filter(status="4")
        self.assertEqual(len(invoices), 2)
        invoices = Invoice.objects.filter(date_notified_payment__isnull=True)
        self.assertEqual(len(invoices), 2)
        management.call_command('processinvoices')
        invoices = Invoice.objects.filter(date_notified_payment__isnull=False)
        self.assertEqual(len(invoices), 2)

        article11 = Article()
        article11.title = "Test commission 11"
        article11.slug = "test-commission-11"
        article11.category = Category.objects.get(name="News")
        article11.published = timezone.now()
        article11.author_01 = author1
        article11.author_02 = author2
        article11.save()

        management.call_command('processinvoices')
        invoices = Invoice.objects.filter(status="-")
        self.assertEqual(len(invoices), 2)
        for invoice in invoices:
            self.assertEqual(invoice.invoice_num, 2)
            invoice.status = "0"
            invoice.save()

        commissions = Commission.objects.filter(fund__isnull=True)
        self.assertEqual(len(commissions), 2)
        for commission in commissions:
            commission.commission_due = Decimal(900.00)
            commission.fund = fund
            commission.save()
        for invoice in invoices:
            invoice.status = "4"
        management.call_command('processinvoices')
        invoices = Invoice.objects.filter(date_notified_payment__isnull=True)
        self.assertEqual(len(invoices), 2)
        management.call_command('processinvoices')
        invoices = Invoice.objects.filter(date_notified_payment__isnull=False)
        self.assertEqual(len(invoices), 2)

        commissions = Commission.objects.filter(
            invoice__author__last_name="Bloggs")
        num_bonuses = len([True for c in commissions if c.estimate_bonus() > 0])
        self.assertEqual(num_bonuses, 3)

        commissions = Commission.objects.filter(invoice__author__last_name="Doe")
        num_bonuses = len([True for c in commissions if c.estimate_bonus() > 0])
        self.assertEqual(num_bonuses, 0)

        user = User.objects.create_user('admin', '*****@*****.**', 'abcde')
        user.is_staff = True
        user.is_superuser = True
        user.is_active = True
        user.save()
        c = Client()
        response = c.login(username='******', password='******')
        self.assertEqual(response, True)
        url = reverse('payments:invoice.list')
        response = c.get(url)
        self.assertEqual(response.status_code, 200)
        url = '/invoices/2000/1/2020/9/0/'
        response = c.get(url)
        self.assertEqual(response.status_code, 200)

        invoice = Invoice.objects.all()[0]
        response = c.get('/invoices/' + str(invoice.author.pk) + '-' +
                         str(invoice.invoice_num))
        self.assertEqual(response.status_code, 200)
        response = c.get('/invoices/print/' + str(invoice.author.pk) + '-' +
                         str(invoice.invoice_num))
        self.assertEqual(response.status_code, 200)
        commission = Commission.objects.all()[0]
        response = c.get('/commissions/' + str(commission.pk))
        self.assertEqual(response.status_code, 200)
        response = c.get('/commissions/add')
        self.assertEqual(response.status_code, 200)
        response = c.get('/commissions/analysis')
        self.assertEqual(response.status_code, 200)