Beispiel #1
0
    def test_removeEntryByIndex(self):
        fg = FeedGenerator()
        self.feedId = 'http://example.com'
        self.title = 'Some Testfeed'

        fe = fg.add_entry()
        fe.id('http://lernfunk.de/media/654321/1')
        fe.title('The Third Episode')
        assert len(fg.entry()) == 1
        fg.remove_entry(0)
        assert len(fg.entry()) == 0
Beispiel #2
0
    def test_removeEntryByIndex(self):
        fg = FeedGenerator()
        self.feedId = 'http://example.com'
        self.title = 'Some Testfeed'

        fe = fg.add_entry()
        fe.id('http://lernfunk.de/media/654321/1')
        fe.title('The Third Episode')
        assert len(fg.entry()) == 1
        fg.remove_entry(0)
        assert len(fg.entry()) == 0
Beispiel #3
0
class Box:
    def __init__(self, config=None):
        if config is None:
            config = {}
        self.fg = FeedGenerator()
        self.fg.title(config['title'] if 'title' in config else 'PyFeed')
        self.fg.description(config['description'] if 'description' in config else 'github.com/BugenZhao/PyFeed')
        self.fg.author({'name': 'Bugen Zhao', 'email': '*****@*****.**'})
        self.fg.link(href=config['url'] if 'url' in config else 'github.com/BugenZhao/PyFeed')
        self.fg.language('zh-CN')

        self.dict = {}
        self.version = 'v0.3'

    def add_article(self, article: Article):
        fe = self.fg.add_entry()
        fe.id(article.link)
        fe.link(href=article.link)
        fe.title(article.title)
        fe.description(article.description)
        fe.pubDate(article.date.replace(tzinfo=gettz("Asia/Shanghai")))

        self.fg.updated()

    def update_article(self, article: Article):
        fe = FeedEntry()
        fe.id(article.link)
        fe.link(href=article.link)
        fe.title(article.title)
        fe.description(article.description)
        fe.pubDate(article.date.replace(tzinfo=gettz("Asia/Shanghai")))
        self.fg.entry(fe, replace=True)

        self.fg.updated()

    def article(self, article: Article):
        if article.link not in self.dict:
            logging.info('New article: ' + article.title)
            self.add_article(article)
            self.dict[article.link] = article
        elif article != self.dict[article.link]:
            logging.info('Update article: ' + article.title)
            self.update_article(article)
            self.dict[article.link] = article
        else:
            logging.info('Article already existed: ' + article.title)

    def rss_file(self, filename):
        self.fg.rss_file(filename, pretty=True)

    def size(self):
        return len(self.dict)
Beispiel #4
0
class UQRSS:
    """RSS feed for Urgent Quests."""

    PRIOR_MINS = timedelta(minutes=30)
    EVENT_TIME = str(config.NOW + PRIOR_MINS)

    def __init__(self) -> None:
        """Initialize the RSS generation first by initializing feedgen."""
        self.fg = FeedGenerator()
        self.fg.title('PSO2 News: Urgent Quests')
        self.fg.author(AUTHOR)
        self.fg.description('Phantasy Star Online 2 News tracker for the West')
        self.fg.link(
            href='https://pso2.com/news/urgent-quests',
            rel='alternate',
            )
        self.fg.link(
            href=LINK,
            rel='self',
            )
        self.fg.language('en-US')

    def generate_feed(self) -> None:
        """Generate a feed by going through the database."""
        config.CURSOR.execute('SELECT * FROM UQ')
        results = sorted(config.CURSOR.fetchall(), reverse=True)
        start = 0
        for n, (dt_str, uq, title, url) in enumerate(results):
            if dt_str > self.EVENT_TIME:
                continue
            elif start == 0:
                start = n
            elif n - start > 10:
                return
            entry = self.fg.add_entry()
            entry.title(uq)
            entry.author(AUTHOR)
            #entry.description(uq)
            entry.link(href=url)
            entry.guid(f'{dt_str}/{uq}')
            entry.pubDate(pendulum.parse(dt_str))

    def write_feed(self) -> None:
        """Write out the feed after generating entries."""
        if len(self.fg.entry()) > 0:
            self.fg.rss_file('uq.xml')
        else:
            config.LOGGER.error('No entries were generated!')
Beispiel #5
0
async def generate_nhk_easy_news_feed(
    entry_count=ENTRY_COUNT,
    return_content_only=False,
):
    feed_items = []

    fg = FeedGenerator()
    fg.id('https://www.reddit.com/r/NHKEasyNews')
    fg.title('NHK Easy News')
    fg.language('ja')

    feed = feedparser.parse(
        'https://www.reddit.com/r/NHKEasyNews.rss?limit={}'
        .format(entry_count))

    reddit = praw.Reddit(
        client_id=settings.REDDIT_CLIENT_ID,
        client_secret=settings.REDDIT_CLIENT_SECRET,
        username=settings.REDDIT_CLIENT_USERNAME,
        password=settings.REDDIT_CLIENT_PASSWORD,
        user_agent='Manabi Reader',
    )

    entries = []
    for post in reversed(feed.entries):
        if 'discord server' in post.title.lower():
            continue

        reddit_content = post.content[0].value
        nhk_url_match = re.search(
            r'(http://www3.nhk.or.jp/news/easy/.*?\.html)', reddit_content)
        if nhk_url_match is None:
            continue
        nhk_url = nhk_url_match.group()

        for attempt in range(ATTEMPTS_PER_ENTRY):
            session = AsyncHTMLSession()
            r = await session.get(nhk_url, timeout=60)
            await r.html.arender(keep_page=True)

            try:
                entry = await _process_and_add_entry(
                    post, nhk_url, r, fg, reddit)
            except NoArticleBodyError:
                if attempt < ATTEMPTS_PER_ENTRY - 1:
                    continue
                raise

            if entry is not None:
                entries.append(entry)

                #r.html.page.close()
                await session.close()
                break

        if entry is None:
            continue

    if return_content_only:
        html = ''
        for entry in reversed(entries):
            title = entry.title()
            content = entry.content()['content']
            html += f'<h1>{title}</h1>{content}'
        return html


    if fg.entry() == 0:
        raise Exception("Generated zero feed entries from NHK Easy News.")

    return fg.atom_str(pretty=True, encoding='utf-8')
Beispiel #6
0
 def test_setEntries(self):
     fg2 = FeedGenerator()
     fg2.entry(self.fg.entry())
     assert len(fg2.entry()) == 3
     assert self.fg.entry() == fg2.entry()
Beispiel #7
0
 def test_setEntries(self):
     fg2 = FeedGenerator()
     fg2.entry(self.fg.entry())
     assert len(fg2.entry()) == 3
     assert self.fg.entry() == fg2.entry()
Beispiel #8
0
    """

    entry = fg.add_entry()
    entry.title(article['title'])
    entry.author({'name': 'Tree of Savior'})
    entry.description(article['title'])
    entry.link(href=article['url'])
    entry.guid(article['url'])
    entry.pubDate(article['date'])
    return


if __name__ == '__main__':
    fg = FeedGenerator()
    fg.title('Tree of Savior News')
    fg.author({'name': 'IMC Games'})
    fg.description('News for the International Tree of Savior Servers')
    fg.link(href='https://treeofsavior.com/page/news/', rel='alternate')
    fg.link(href='https://dark-nova.me/tos/feed.xml', rel='self')
    fg.logo('https://dark-nova.me/tos/feed.png')
    fg.language('en-US')

    all_news = scraper.get_news()
    for article in all_news:
        populate_item(fg, article)

    if len(fg.entry()) > 0:
        fg.rss_file('feed.xml')
    else:
        scraper.logger.error(f'Could not generate entries for feed')