def test_updated_no_date():
    item = dict(id="123")
    parser = JsonItemParser(item)

    entry = Entry()
    entry.updated = parser.updated(item)
    assert not entry.updated
Example #2
0
def test_entry_authors_added_on_update(session, feed):
    entry = Entry(
        title="Title", feed=feed, link="http://test.com/entry1", guid="qwertyuiop"
    )
    authorstring = entry.create_author_string()

    assert authorstring == ""
    assert len(entry.authors) == 0

    author = dict(name="Test Author2", email="*****@*****.**")
    item = dict(authors=[author], title="Title 2", link=entry.link)

    assert entry.title != item["title"]

    e, a = RssItemParser(item, feed, entry=entry).parse()

    assert e is not None
    assert len(e.authors) == 1
    assert e.authors[0].name == author["name"]
    assert e.authors[0].email == author["email"]

    new_authorstring = entry.create_author_string()
    assert new_authorstring == author["name"]

    assert entry.title == item["title"]
Example #3
0
    def test_updated_no_date(self):
        item = dict()
        parser = JsonItemParser(item, self.feed)

        entry = Entry()
        entry.updated = parser.updated(item)

        self.assertIsNone(entry.updated)
Example #4
0
 def clean_html(self, ids):
     entries = Entry.query.filter(Entry.id.in_(ids)).all()
     app.logger.info(u"Admin Cleaning HTML for Entries: {0}".format(u", ".join(map(str, entries))))
     for entry in entries:
         entry.content = Entry.clean_images(entry.content)
         entry.summary = Entry.clean_images(entry.summary)
         entry.title = Entry.clean_title(entry.title)
         db.session.add(entry)
     db.session.commit()
def test_updated():
    dt_unix = "1448928000"
    item = dict(updated=dt_unix)
    parser = JsonItemParser(item)

    entry = Entry()
    entry.updated = parser.updated(item)

    assert entry.updated
    dt = datetime(2015, 12, 1)
    TestCase().assertAlmostEqual(entry.updated, dt, delta=timedelta(seconds=1))
def test_published_no_date():
    item = dict(id="123")
    parser = JsonItemParser(item)

    entry = Entry()
    entry.published = parser.published(item)

    assert entry.published

    dt = datetime.utcnow()
    TestCase().assertAlmostEqual(entry.published, dt, delta=timedelta(seconds=1))
Example #7
0
    def test_published_no_date(self):
        item = dict()
        parser = JsonItemParser(item, self.feed)

        entry = Entry()
        entry.published = parser.published(item)

        self.assertIsNotNone(entry.published)

        dt = datetime.utcnow()
        self.assertAlmostEqual(entry.published, dt, delta=timedelta(seconds=1))
Example #8
0
    def test_published_updated(self):
        dt_unix = '1448928000'
        item = dict(updated=dt_unix)
        parser = JsonItemParser(item, self.feed)

        entry = Entry()
        entry.published = parser.published(item)

        self.assertIsNotNone(entry.published)
        dt = datetime(2015, 12, 1)
        self.assertAlmostEqual(entry.published, dt, delta=timedelta(seconds=1))
Example #9
0
def test_parser_base_new_entries(session):
    entry1 = Entry()
    entry1.is_update = True
    entry2 = Entry()
    entry2.is_update = False
    entry3 = Entry()

    entries = {entry1, entry2, entry3}
    parser = RssParser()

    new_entries = parser.new_entries(entries)
    assert len(new_entries) == 2
    assert entry2 in new_entries
    assert entry3 in new_entries
Example #10
0
 def clean_html(self, ids):
     try:
         entries = Entry.query.filter(Entry.id.in_(ids)).all()
         app.logger.info(
             "Admin Cleaning HTML for Entries: %s", stringify_list(entries)
         )
         for entry in entries:
             entry.content = Entry.clean_content(entry.content)
             entry.summary = Entry.clean_content(entry.summary)
             entry.title = Entry.clean_title(entry.title)
             entry.create_summary()
             db.session.add(entry)
         db.session.commit()
         flash(f"HTML was successfully cleaned for {len(ids)} Entries.", "success")
     except Exception as ex:
         if not self.handle_view_exception(ex):
             raise
         flash(f"Failed to clean Entry HTML. {ex}", "error")
Example #11
0
    def title(self, item):
        """
        Gets the title of an item.

        :param item: deserialized JSON item
        :type item: dict
        :return: str
        """
        title = item.get('title', None)
        return Entry.clean_title(title)
Example #12
0
    def title(self, item: Dict) -> str:
        """
        Gets the title of an item.

        :param item: Feedparser entry
        :type item: Dict
        :return: str
        """
        title = item.get("title", "")
        return Entry.clean_title(title)
Example #13
0
    def title(self, item):
        """
        Gets the title of an item.

        :param item: Feedparser entry
        :type item: dict
        :return: str
        """
        title = item.get('title', None)
        return Entry.clean_title(title)
Example #14
0
    def title(self, item: Dict) -> str:
        """
        Gets the title of an item.

        :param item: deserialized JSON item
        :type item: Dict
        :return: str
        """
        title = item.get("title", "")
        if title:
            title = Entry.clean_title(title)
        return title
Example #15
0
    def summary(self, item):
        """
        Gets the summary of an item.

        :param item: Feedparser entry
        :type item: dict
        :return: str
        """
        summary = item.get('summary', None)
        if summary:
            summary = Entry.clean_images(summary)
        return summary
Example #16
0
def test_parse_wapo_feed(feed, rss_parser, wapo_rss):
    existing_guid = (
        "https://www.washingtonpost.com/business/economy/trump-rewrites-gop-playbook-in-his"
        "-own-image/2018/02/11/8505873c-0dec-11e8-8890-372e2047c935_story.html"
    )
    published_date = datetime(2018, 1, 1)
    existing_entry = Entry(feed=feed, guid=existing_guid, published=published_date)
    existing_entry.save()

    assert not existing_entry.updated
    assert existing_entry.published == published_date
    assert len(existing_entry.authors) == 0

    rss_parser.feed = feed
    rss_parser.data = wapo_rss
    rss_parser.parse()

    parsed_entries = rss_parser.entries
    parsed_authors = rss_parser.authors

    assert len(parsed_entries) == 3
    assert len(parsed_authors) == 3

    entry1 = next((e for e in parsed_entries if e.authorstring == "Liz Clarke"), None)
    assert entry1
    assert entry1.link
    assert entry1.published
    assert not entry1.updated
    assert len(entry1.authors) == 1
    assert entry1.authors[0].name == "Liz Clarke"

    updated_entry = next((e for e in parsed_entries if e.guid == existing_guid), None)
    assert updated_entry
    assert updated_entry.link
    assert updated_entry.published == published_date
    assert updated_entry.updated
    assert updated_entry.updated != published_date
    assert len(updated_entry.authors) == 2
Example #17
0
def test_entry_authors_updated(session, feed):
    author1 = AuthorFactory()
    entry = Entry(title="Title", feed=feed, link="http://test.com", guid="asdfghjkl")
    entry.authors.append(author1)

    authorstring = entry.create_author_string()

    author2 = dict(name="Jane Doe", email="*****@*****.**")
    item = dict(authors=[author2], title="Title 2", link=entry.link)

    e, a = RssItemParser(item, feed, entry=entry).parse()

    assert e is not None
    assert len(e.authors) == 2
    assert e.authors[0] == author1
    assert e.authors[1].name == author2["name"]

    assert len(entry.authors) == 2

    new_authorstring = entry.create_author_string()
    assert new_authorstring == authorstring + " and " + author2["name"]

    assert entry.title == item["title"]
Example #18
0
    def content(self, item):
        """
        Gets the content of the item. If content is None, try
        using the summary.

        :param item: deserialized JSON item
        :type item: dict
        :return: str
        """
        content = item.get('content', None)
        if content is None:
            content = item.get('summary', None)
        if content:
            content = Entry.clean_images(content)
        return content
Example #19
0
    def content(self, item):
        """
        Gets the content of the item. If content is None, try
        using the summary.

        :param item: Feedparser entry
        :type item: dict
        :return: str
        """
        if item.get('content') is None:
            content = item.get('summary', None)
        else:
            try:
                c = item.get('content')[0]
                content = c.get('value')
            except:
                content = None
        if content:
            content = Entry.clean_images(content)
        return content
Example #20
0
def test_clean_content(client):
    with open(TEST_FILES_DIR + "entry_content.xml", "r") as f:
        content = f.read()

    cleaned = Entry.clean_content(content, parser=bs4_parser())
    assert cleaned.replace("\n", "").strip() == "<div><p>Testing</p></div>"
Example #21
0
def test_entry_remove_empty_tags(client):
    content = "<div><p>Testing<span>\n<o:p></o:p></span></p></div>"
    soup = BeautifulSoup(content, bs4_parser())
    Entry.remove_empty_tags(soup)
    assert str(soup) == "<div><p>Testing</p></div>"
Example #22
0
def test_entry_remove_comments(client):
    content = "<div><p><!-- Comment -->Testing</p></div><!--Comment-->"
    soup = BeautifulSoup(content, bs4_parser())
    Entry.remove_comments(soup)
    assert str(soup) == "<div><p>Testing</p></div>"
Example #23
0
def test_entry_remove_style(client):
    content = '<div style="font-size:50px;" class="testing"><p style="color:blue;">Testing<style>Hello</style></p></div>'
    soup = BeautifulSoup(content, bs4_parser())
    Entry.remove_unwanted_elements(soup)
    assert str(soup) == "<div><p>Testing</p></div>"
Example #24
0
    def handle_notification(self, feed, data):
        """Handles PuSH notifications in RSS and Atom format."""

        app.logger.info(u'Handling RSS notification for {0}'.format(feed))

        if data.feed is not None:
            if data.feed.get('title'):
                feed.title = data.feed.get('title')

            if data.feed.get('subtitle'):
                feed.description = data.feed.get('subtitle')
            elif data.feed.get('description'):
                feed.description = data.feed.get('description')

        db.session.add(feed)

        if data.entries is None:
            app.logger.warning(u'No entries in notification for {0}'
                               .format(feed))
            return

        for item in data.entries:
            try:
                itemId = item.get('id')
            except KeyError:
                itemId = item.get('link')

            if itemId is None:
                app.logger.warning(u'Could not get itemId for item {0} in '
                                   'feed {1}'.format(item, feed))
                continue

            entry = Entry.query.filter_by(guid=itemId).first()
            if entry is None:
                entry = Entry(feed=feed)

            try:
                entry.published = datetime.fromtimestamp(
                    mktime(item.get('published_parsed')))
            except Exception:
                pass
            try:
                entry.updated = datetime.fromtimestamp(
                    mktime(item.get('updated_parsed')))
            except Exception:
                pass
            entry.title = item.get('title')
            entry.guid = itemId
            entry.link = item.get('link')
            if item.get('content') is None:
                entry.content = item.get('summary')
            else:
                content = item.get('content')[0]
                entry.content = content.get('value')
                entry.summary = item.get('summary')

            self.add_authors(entry, item.get('author'))

            entry.get_wordcount()

            app.logger.info(u'Adding entry <{0}>'.format(entry))
            self.entries.append(entry)
            db.session.add(entry)

        return
Example #25
0
def create_dev_data(user):
    from faker import Factory as FakerFactory

    faker = FakerFactory.create()

    feed = Feed(
        topic="http://test.com/feed",
        hub="http://push.hub.com",
        site_url="http://test.com",
        title="Test Feed",
        description="A test feed",
        site_name="TestFeed.com",
        user=user,
    )
    db.session.add(feed)

    entry1 = Entry(
        title="Test Entry",
        guid="http://test.com/feed/12345345234",
        content=list_to_html_paragraphs(faker.paragraphs(nb=5)),
        published=datetime(2017, 1, 1),
        site="TestFeed.com",
    )
    db.session.add(entry1)

    entry2 = Entry(
        title="Another Test Entry",
        guid="http://test.com/feed/346546gsdfgd",
        content=list_to_html_paragraphs(faker.paragraphs()),
        published=datetime(2017, 2, 1),
        site="TestFeed.com",
    )
    db.session.add(entry2)

    author1 = Author(
        givenname="Testy",
        familyname="McTesterson",
        name="Testy McTesterson",
        email="*****@*****.**",
        url="http://test.com/authors/testy",
    )
    db.session.add(author1)

    author2 = Author(
        givenname="John",
        familyname="Doe",
        name="John Doe",
        email="*****@*****.**",
        url="http://test.com/authors/johndoe",
    )
    db.session.add(author2)

    entry1.add_authors([author1])
    entry2.add_authors([author1, author2])

    subscription = Subscription(user=user, author=author1, active=True)
    subscription.add_period(PERIOD.DAILY)
    subscription.add_period(PERIOD.IMMEDIATE)
    subscription.add_period(PERIOD.WEEKLY)
    subscription.add_period(PERIOD.MONTHLY)

    db.session.add(subscription)

    r1 = Recommended(author=author1, active=True)
    r2 = Recommended(author=author2, active=True)
    db.session.add(r1)
    db.session.add(r2)

    dailyPeriod = Period.query.filter_by(name=PERIOD.DAILY).first()
    email = Email(
        user=user,
        period=dailyPeriod,
        authors=[author1, author2],
        entries=[entry1, entry2],
        address=user.email,
    )
    db.session.add(email)

    db.session.commit()
Example #26
0
 def test_entry_clean_title(self):
     string = "\u2019 Test Title"
     title = Entry.clean_title(string)
     self.assertEqual(title, '\u2019 Test Title')
Example #27
0
def test_entry_clean_title(client):
    string = "\u2019 Test Title"
    title = Entry.clean_title(string)
    assert title == "\u2019 Test Title"
Example #28
0
    def handle_json_notification(self, feed, data):
        """Handles PuSH notifications in JSON format"""

        app.logger.info(u'Handling JSON notification for {0}'.format(feed))

        if data.get('title'):
            feed.title = data.get('title')
        if data.get('description'):
            feed.description = data.get('description')

        db.session.add(feed)

        if data.get('items') is None:
            app.logger.warning(u'No entries in notification for {0}'
                               .format(feed))
            return

        for item in data.get('items'):
            try:
                itemId = item.get('id')
            except KeyError:
                itemId = item.get('link')

            if itemId is None:
                app.logger.warning(u'Could not get itemId for item {0} in '
                                   'feed {1}'.format(item, feed))
                continue

            entry = Entry.query.filter_by(guid=itemId).first()
            if entry is None:
                entry = Entry(feed=feed)

            entry.title = item.get('title')
            entry.guid = itemId
            try:
                entry.published = datetime.fromtimestamp(item.get('published'))
            except Exception:
                pass
            try:
                entry.updated = datetime.fromtimestamp(item.get('updated'))
            except Exception:
                pass
            if item.get('content') is None:
                entry.content = item.get('summary')
            else:
                entry.content = item.get('content')
                entry.summary = item.get('summary')

            entry.link = item.get('permalinkUrl')

            self.add_authors(entry, item.get('actor.displayname'))

            entry.get_wordcount()

            app.logger.info(u'Adding entry <{0}>'.format(entry))
            self.entries.append(entry)
            db.session.add(entry)

        return
Example #29
0
def test_entry_remove_feedflare(client):
    content = '<div><p>Testing</p></div><div class="feedflare"><p>Feedflare</p></div>'
    soup = BeautifulSoup(content, bs4_parser())
    Entry.remove_feedflare(soup)
    assert str(soup) == "<div><p>Testing</p></div>"