Example #1
0
def process_output_to_epub(feed: FeedParserDict, limit: int) -> epub.EpubBook:
    """Form epub structure for epub file"""
    logging.debug("Starting format html for epub file")
    book = epub.EpubBook()
    book.set_identifier('rss news')
    book.set_title(feed.get("feed", {}).get("title"))
    book.set_language('en')

    book.spine = ['nav']
    book.toc = []
    for news in feed.get("entries")[:limit]:
        chapter = epub.EpubHtml(title=news.get("title"),
                                file_name=str(hash(news.get("title"))) +
                                '.xhtml')
        content = BeautifulSoup(
            create_title(news.get("title")) + news.get("summary") +
            create_link(news.get("link")), "lxml")
        images = content.find_all('img')
        process_images(images, book)
        chapter.set_content(str(content))
        book.add_item(chapter)
        book.spine.append(chapter)
        book.toc.append(chapter)

    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    style = 'BODY {color: white;}'
    nav_css = epub.EpubItem(uid="style_nav",
                            file_name="style/nav.css",
                            media_type="text/css",
                            content=style)
    book.add_item(nav_css)
    logging.debug("Structure for epub file created")
    return book
Example #2
0
    def _update_feed(
        cls,
        feed_subscription: FeedSubscription,
        feed_data: FeedParserDict,
    ) -> Feed:
        """
        Create or update Feed based on parsed data.

        :param feed_subscription: FeedSubscription related instance.
        :param feed_data: Parsed RSS data.
        :return: Processed Feed instance.
        """
        cloud = feed_data.feed.get('cloud', {})
        image = feed_data.feed.get('image', {})
        text_input = feed_data.feed.get('textinput', {})
        # Create a filed_name:value dict out of fetched data for Feed
        data = {
            'cloud_domain': cloud.get('domain'),
            'cloud_path': cloud.get('path'),
            'cloud_port': cloud.get('port'),
            'cloud_protocol': cloud.get('protocol'),
            'cloud_register_procedure': cloud.get('registerProcedure'),
            'copyright': feed_data.feed.get('rights'),
            'description': feed_data.feed.get('subtitle'),
            'docs': feed_data.feed.get('docs'),
            'encoding': feed_data.get('encoding'),
            'generator': feed_data.feed.get('generator'),
            'image_description': image.get('description'),
            'image_height': image.get('height'),
            'image_link': image.get('width'),
            'image_title': image.get('title'),
            'image_url': image.get('href'),
            'image_width': image.get('width'),
            'language': feed_data.feed.get('language'),
            'link': feed_data.feed.get('link'),
            'managing_editor': feed_data.feed.get('author'),
            'pub_date': cls.get_pub_date(feed_data.feed),
            'subscription': feed_subscription,
            'text_input_description': text_input.get('description'),
            'text_input_link': text_input.get('link'),
            'text_input_name': text_input.get('name'),
            'text_input_title': text_input.get('title'),
            'title': feed_data.feed.get('title'),
            'ttl': feed_data.feed.get('ttl'),
            'version': feed_data.get('version'),
            'web_master': feed_data.feed.get('publisher')
        }

        try:
            feed = feed_subscription.feed

            # Update Feed with fetched values
            for name, value in data.items():
                setattr(feed, name, value)
        except FeedSubscription.feed.RelatedObjectDoesNotExist:
            # Make a new Feed instance with fetched values
            feed = Feed(**data)

        feed.save()
        return feed
def process_json(feed: FeedParserDict, limit: int) -> Dict:
    """Form object for converting to json"""
    logging.debug("Starting create JSON object...")
    result = {"title": feed.get("feed", {}).get("title"), "entries": []}
    keys = [
        "title", "author", "link", "published", "summary", "comments", "links"
    ]
    for news in feed.get("entries")[:limit]:
        result["entries"].append(
            {key: news[key]
             for key in keys if key in news})
    logging.debug("JSON created")
    return result
def process_formatted_output(feed: FeedParserDict,
                             limit: int) -> List[Optional[str]]:
    """Form readable output for console"""
    logging.debug("Starting format output")
    result = [format_output.wrap_string(feed.get("feed", {}).get("title"))]
    for news in feed.get("entries")[:limit]:
        result.append(format_output.format_title(news.get("title")))
        result.extend(format_output.format_fields(news, "author", "link"))
        result.append(format_output.format_date(news.get("published")))
        result.append(format_output.format_summary(news.get("summary")))
        result.extend(format_output.format_fields(news, "comments"))
        result.extend(format_output.format_links(news.get("links")))
    logging.debug("Output formatted")
    return result
Example #5
0
    def test__update_feed__updates_feed__if_relation_exists(self) -> None:
        title = 'test2'
        data = FeedParserDict({'feed': {'title': title}})

        FeedUpdater._update_feed(self.feed_subscription, data)
        self.feed.refresh_from_db()

        self.assertEqual(self.feed.title, title)
Example #6
0
    def test__update_feed__creates_feed__if_relation_is_missing(self) -> None:
        self.feed.delete()
        title = 'test2'
        data = FeedParserDict({'feed': {'title': title}})

        FeedUpdater._update_feed(self.feed_subscription, data)

        self.assertEqual(self.feed_subscription.feed.title, title)
Example #7
0
    def test__update_categories__replace_old_categories_with_new(self) -> None:
        FeedCategory.objects.create(feed=self.feed, keyword='old_keyword')
        new_keyword = 'keyword'
        feed_data = FeedParserDict({'feed': {
            'tags': [{
                'term': 'keyword',
            }]
        }})

        FeedUpdater._update_categories(self.feed, feed_data)

        category_count = FeedCategory.objects.filter(feed=self.feed).count()
        self.assertEqual(category_count, 1)
        feed_category = FeedCategory.objects.filter(feed=self.feed).first()
        self.assertEqual(feed_category.keyword, new_keyword)
Example #8
0
def getFeed(url, request_headers=None, handlers=None):
    parsed = list(urlparse.urlparse(url))
    parsed[2] = re.sub("/{2,}", "/", parsed[2])  # replace two or more / with one

    try:
        feed = parse(url, False, False, request_headers, handlers=handlers)

        if feed:
            if 'entries' in feed:
                return feed
            elif 'error' in feed.feed:
                err_code = feed.feed['error']['code']
                err_desc = feed.feed['error']['description']
                logger.log(u'RSS ERROR:[%s] CODE:[%s]' % (err_desc, err_code), logger.DEBUG)
        else:
            logger.log(u'RSS error loading url: ' + url, logger.DEBUG)

    except Exception as e:
        logger.log(u'RSS error: ' + ex(e), logger.DEBUG)

    return FeedParserDict()
Example #9
0
    def update(cls, feed_subscription_id: int) -> Tuple[Feed, FeedParserDict]:
        """
        Parse feed from RSS page, create/update Feed and related instances of
        FeedCategory.

        :param feed_subscription_id: FeedSubscription id to update RSS.
        :return: Tuple with Feed instance and parsed RSS data.
        """
        feed_subscription = cls._get_feed_subscription(feed_subscription_id)
        feed_subscription.in_progress()

        try:
            with transaction.atomic():
                feed_data = cls._get_feed_data(feed_subscription.url)
                feed = cls._update_feed(feed_subscription, feed_data)
                cls._update_categories(feed, feed_data)
                feed_subscription.success()
        except Exception as e:
            feed_subscription.failure()
            raise e

        return feed, feed_data.get('entries', FeedParserDict())
Example #10
0
sheldon_feed_dict = FeedParserDict({
    'bozo':
    False,
    'debug_message':
    'The feed has not changed since you last checked, so the '
    'server sent no data.  This is a feature, not a bug!',
    'entries': [],
    'etag':
    '"a740a10f5d95c83b973395fc75c97714"',
    'feed': {},
    'headers': {
        'age': '441',
        'cache-control': 'public, must-revalidate, proxy-revalidate, '
        'max-age=900',
        'connection': 'close',
        'date': 'Fri, 24 Jul 2020 17:03:31 GMT',
        'etag': '"a740a10f5d95c83b973395fc75c97714"',
        'expires': 'Fri, 24 Jul 2020 17:00:01 GMT',
        'server': 'AmazonS3',
        'via': '1.1 42ef990e439ae115ff739f04e3945234.cloudfront.net '
        '(CloudFront)',
        'x-amz-cf-id':
        'WWkREBUOkcmiydbH3WQNRpviU9VxTkG9RGf5pLkmsTL2EnzLn4l6tA==',
        'x-amz-cf-pop': 'SEA19-C1',
        'x-cache': 'Hit from cloudfront'
    },
    'href':
    'http://cdn.sheldoncomics.com/rss.xml',
    'status':
    304,
    'version':
    ''
})
Example #11
0
def format_fields(news: FeedParserDict, *fields: str) -> List[Optional[str]]:
    """Format fields of news for output"""
    return [
        add_title(news.get(field), field) for field in fields if field in news
    ]