def get_data_from_page(self, page, url): try: feed = parse_feed(StringIO(page+'')) except TypeError: feed = parse_feed(StringIO(str(page))) self.fact.cache[url] = time.time() return feed
def get_data_from_page(self, page_content, url): if not page_content: # empty result from ConditionalGetPage when Last-Modified header not changed return self.fact.cache[url] = time.time() if self.fact.name == "pages": return page_content try: feed = parse_feed(StringIO(page_content + '')) except TypeError: feed = parse_feed(StringIO(str(page_content))) return feed
def get_data_from_page(self, page_content, url): if not page_content: # empty result from ConditionalGetPage when Last-Modified header not changed return self.fact.cache[url] = time.time() if self.fact.name == "pages": return page_content try: feed = parse_feed(StringIO(page_content+'')) except TypeError: feed = parse_feed(StringIO(str(page_content))) return feed
def parse_tweets(raw_tweets, source, now=None): """ Parses a list of raw tweet lines from a twtxt file and returns a list of :class:`Tweet` objects. :param list raw_tweets: list of raw tweet lines :param Source source: the source of the given tweets :param Datetime now: the current datetime :returns: a list of parsed tweets :class:`Tweet` objects :rtype: list """ if now is None: now = datetime.now(timezone.utc) dom = parse_feed('\n'.join(raw_tweets)) if dom.bozo: tweets = [] for line in raw_tweets: try: tweet = parse_tweet(line, source, now) except (ValueError, OverflowError) as e: logger.debug("{0} - {1}".format(source.url, e)) else: tweets.append(tweet) else: tweets = [ Tweet( click.unstyle(m.title.strip()) + ' ' + m.links[0].href, parse_iso8601(m.updated), source ) for m in dom.entries ] return tweets
def feed(self) -> Feed: if self._feed: return self._feed if self.feed_config.save_bandwith: raw_feed = parse_feed( self.feed_config.source, etag=self.feed_config.etag, modified=self.feed_config.modified, ) else: raw_feed = parse_feed(self.feed_config.source) if raw_feed.status == 304: return Feed() self.feed_config.etag = raw_feed.etag self.feed_config.modified = raw_feed.modified self._feed = Feed.from_feedparser(raw_feed) return self._feed
def __call__(self): d = parse_feed(self.url) if self.title == "" and self.link == "": self.title = getattr(d.feed, "title", "") self.link = getattr(d.feed, "link", "") new = [] for v in d.entries: e = { "time": mktime(v.updated_parsed), "title": v.title, "summary": html2text(v.summary).strip().split("\n")[0], "link": v.links[0].href } if e not in self.entries: self.entries.append(e) new.append(e) if not new == []: s = [] s.append("RSS: {0:s} ({1:s})".format(self.title, self.link)) for e in new[:3]: x = sum([len(e["title"]), len(e["summary"]), len(e["link"])]) if x > 450: y = sum([len(e["title"]), len(e["link"])]) s.append( " * {0:s}: {1:s} ... <{2:s}>".format( e["title"], e["summary"][:(450 - y)], e["link"] ) ) else: s.append( " * {0:s}: {1:s} <{2:s}>".format( e["title"], e["summary"], e["link"] ) ) return s else: return []
async def fetch(self) -> FeedParserDict: headers = ({} if self.user_agent is None else { 'User-Agent': self.user_agent }) async with ClientSession(headers=headers) as session: async with session.get(self.url) as response: if response.status != 200: raise FeedError(f'Feed {self.name!r}: error sending ' + f'request to {self.url!r}') text = await response.text() rss = parse_feed(text) if rss['bozo']: raise FeedError( f'Feed {self.name!r}: error parsing url {self.url!r}' ) from rss['bozo_exception'] await logging.info(f'Feed {self.name!r}: downloaded url {self.url!r}') return rss
def make_site_with_rssfeed_readable_again(url, filename, is_clean): """Convert feed to an HTML.""" with open(filename, 'w') as file_object: print "\nOPENING URL: " + url + "\n\n" headers = { 'User-Agent': APP_BRANDNAME + '/' + APP_RELEASE + ' (Unix; Intel OS Nine 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36' } response = requests.get(url, headers=headers) mystr = response.text # remove heigh and width in images because CSS will do that mystr = mystr.replace(u"height=", "whatever=") mystr = mystr.replace(u"width=", "whatever=") # remove unwanted strings in output mystr = mystr.replace(u'<hr id=', '<hr class="spenden" id=') mystr = mystr.replace(u"<p><strong>Hilf mit!</strong>", "") mystr = mystr.replace( u"Mit Deiner finanziellen Hilfe unterstützt Du unabhängigen Journalismus.", "") if APP_DEBUG: print "FEED:\n" + str(mystr) + "\n****************************" feedtitle = None try: root = parse_feed(mystr) entries = root.entries # access feedtitle feedtitle = root.feed.title except Exception, e: print "PARSING-ERROR: " + str(e) print(traceback.format_exc()) pass if not feedtitle: feedtitle = DEFAULT_TITLE if is_clean: template = APP_PATH + '/' + 'template_clean.html' else: template = APP_PATH + '/' + 'template_readable.html' if APP_DEBUG: print "\n ENTRIES TO RENDER: " + str(len(entries)) + "\n" last_entry_link = entries[len(entries) - 1].link html_footer = site_footer_html() html_content = Template(filename=template, output_encoding='utf-8').render( last_entry_link=last_entry_link, num_of_entries=len(entries), feedurl=url, entries=entries, feedtitle=feedtitle, footer=html_footer) if APP_DEBUG: print "HTML:\n" + html_content + "\n****************************" if is_clean: clean = Document(html_content) file_object.write(clean.content()) else: file_object.write(html_content)
async def parse(content): return parse_feed(content)