def process_output_to_epub(feed: FeedParserDict, limit: int) -> epub.EpubBook: """Form epub structure for epub file""" logging.debug("Starting format html for epub file") book = epub.EpubBook() book.set_identifier('rss news') book.set_title(feed.get("feed", {}).get("title")) book.set_language('en') book.spine = ['nav'] book.toc = [] for news in feed.get("entries")[:limit]: chapter = epub.EpubHtml(title=news.get("title"), file_name=str(hash(news.get("title"))) + '.xhtml') content = BeautifulSoup( create_title(news.get("title")) + news.get("summary") + create_link(news.get("link")), "lxml") images = content.find_all('img') process_images(images, book) chapter.set_content(str(content)) book.add_item(chapter) book.spine.append(chapter) book.toc.append(chapter) book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNav()) style = 'BODY {color: white;}' nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) book.add_item(nav_css) logging.debug("Structure for epub file created") return book
def _update_feed( cls, feed_subscription: FeedSubscription, feed_data: FeedParserDict, ) -> Feed: """ Create or update Feed based on parsed data. :param feed_subscription: FeedSubscription related instance. :param feed_data: Parsed RSS data. :return: Processed Feed instance. """ cloud = feed_data.feed.get('cloud', {}) image = feed_data.feed.get('image', {}) text_input = feed_data.feed.get('textinput', {}) # Create a filed_name:value dict out of fetched data for Feed data = { 'cloud_domain': cloud.get('domain'), 'cloud_path': cloud.get('path'), 'cloud_port': cloud.get('port'), 'cloud_protocol': cloud.get('protocol'), 'cloud_register_procedure': cloud.get('registerProcedure'), 'copyright': feed_data.feed.get('rights'), 'description': feed_data.feed.get('subtitle'), 'docs': feed_data.feed.get('docs'), 'encoding': feed_data.get('encoding'), 'generator': feed_data.feed.get('generator'), 'image_description': image.get('description'), 'image_height': image.get('height'), 'image_link': image.get('width'), 'image_title': image.get('title'), 'image_url': image.get('href'), 'image_width': image.get('width'), 'language': feed_data.feed.get('language'), 'link': feed_data.feed.get('link'), 'managing_editor': feed_data.feed.get('author'), 'pub_date': cls.get_pub_date(feed_data.feed), 'subscription': feed_subscription, 'text_input_description': text_input.get('description'), 'text_input_link': text_input.get('link'), 'text_input_name': text_input.get('name'), 'text_input_title': text_input.get('title'), 'title': feed_data.feed.get('title'), 'ttl': feed_data.feed.get('ttl'), 'version': feed_data.get('version'), 'web_master': feed_data.feed.get('publisher') } try: feed = feed_subscription.feed # Update Feed with fetched values for name, value in data.items(): setattr(feed, name, value) except FeedSubscription.feed.RelatedObjectDoesNotExist: # Make a new Feed instance with fetched values feed = Feed(**data) feed.save() return feed
def process_json(feed: FeedParserDict, limit: int) -> Dict: """Form object for converting to json""" logging.debug("Starting create JSON object...") result = {"title": feed.get("feed", {}).get("title"), "entries": []} keys = [ "title", "author", "link", "published", "summary", "comments", "links" ] for news in feed.get("entries")[:limit]: result["entries"].append( {key: news[key] for key in keys if key in news}) logging.debug("JSON created") return result
def process_formatted_output(feed: FeedParserDict, limit: int) -> List[Optional[str]]: """Form readable output for console""" logging.debug("Starting format output") result = [format_output.wrap_string(feed.get("feed", {}).get("title"))] for news in feed.get("entries")[:limit]: result.append(format_output.format_title(news.get("title"))) result.extend(format_output.format_fields(news, "author", "link")) result.append(format_output.format_date(news.get("published"))) result.append(format_output.format_summary(news.get("summary"))) result.extend(format_output.format_fields(news, "comments")) result.extend(format_output.format_links(news.get("links"))) logging.debug("Output formatted") return result
def test__update_feed__updates_feed__if_relation_exists(self) -> None: title = 'test2' data = FeedParserDict({'feed': {'title': title}}) FeedUpdater._update_feed(self.feed_subscription, data) self.feed.refresh_from_db() self.assertEqual(self.feed.title, title)
def test__update_feed__creates_feed__if_relation_is_missing(self) -> None: self.feed.delete() title = 'test2' data = FeedParserDict({'feed': {'title': title}}) FeedUpdater._update_feed(self.feed_subscription, data) self.assertEqual(self.feed_subscription.feed.title, title)
def test__update_categories__replace_old_categories_with_new(self) -> None: FeedCategory.objects.create(feed=self.feed, keyword='old_keyword') new_keyword = 'keyword' feed_data = FeedParserDict({'feed': { 'tags': [{ 'term': 'keyword', }] }}) FeedUpdater._update_categories(self.feed, feed_data) category_count = FeedCategory.objects.filter(feed=self.feed).count() self.assertEqual(category_count, 1) feed_category = FeedCategory.objects.filter(feed=self.feed).first() self.assertEqual(feed_category.keyword, new_keyword)
def getFeed(url, request_headers=None, handlers=None): parsed = list(urlparse.urlparse(url)) parsed[2] = re.sub("/{2,}", "/", parsed[2]) # replace two or more / with one try: feed = parse(url, False, False, request_headers, handlers=handlers) if feed: if 'entries' in feed: return feed elif 'error' in feed.feed: err_code = feed.feed['error']['code'] err_desc = feed.feed['error']['description'] logger.log(u'RSS ERROR:[%s] CODE:[%s]' % (err_desc, err_code), logger.DEBUG) else: logger.log(u'RSS error loading url: ' + url, logger.DEBUG) except Exception as e: logger.log(u'RSS error: ' + ex(e), logger.DEBUG) return FeedParserDict()
def update(cls, feed_subscription_id: int) -> Tuple[Feed, FeedParserDict]: """ Parse feed from RSS page, create/update Feed and related instances of FeedCategory. :param feed_subscription_id: FeedSubscription id to update RSS. :return: Tuple with Feed instance and parsed RSS data. """ feed_subscription = cls._get_feed_subscription(feed_subscription_id) feed_subscription.in_progress() try: with transaction.atomic(): feed_data = cls._get_feed_data(feed_subscription.url) feed = cls._update_feed(feed_subscription, feed_data) cls._update_categories(feed, feed_data) feed_subscription.success() except Exception as e: feed_subscription.failure() raise e return feed, feed_data.get('entries', FeedParserDict())
sheldon_feed_dict = FeedParserDict({ 'bozo': False, 'debug_message': 'The feed has not changed since you last checked, so the ' 'server sent no data. This is a feature, not a bug!', 'entries': [], 'etag': '"a740a10f5d95c83b973395fc75c97714"', 'feed': {}, 'headers': { 'age': '441', 'cache-control': 'public, must-revalidate, proxy-revalidate, ' 'max-age=900', 'connection': 'close', 'date': 'Fri, 24 Jul 2020 17:03:31 GMT', 'etag': '"a740a10f5d95c83b973395fc75c97714"', 'expires': 'Fri, 24 Jul 2020 17:00:01 GMT', 'server': 'AmazonS3', 'via': '1.1 42ef990e439ae115ff739f04e3945234.cloudfront.net ' '(CloudFront)', 'x-amz-cf-id': 'WWkREBUOkcmiydbH3WQNRpviU9VxTkG9RGf5pLkmsTL2EnzLn4l6tA==', 'x-amz-cf-pop': 'SEA19-C1', 'x-cache': 'Hit from cloudfront' }, 'href': 'http://cdn.sheldoncomics.com/rss.xml', 'status': 304, 'version': '' })
def format_fields(news: FeedParserDict, *fields: str) -> List[Optional[str]]: """Format fields of news for output""" return [ add_title(news.get(field), field) for field in fields if field in news ]