Esempio n. 1
0
    def get_rssfeed_parsed(self, rssfeed_data, site_cookies_dict=None, user_agent=None):
        """
        rssfeed_data: A dictionary containing rss feed data as stored in the YaRSS2 config.
        site_cookies_dict: A dictionary of cookie values to be used for this rssfeed.
        """
        return_dict = {}
        rssfeeds_dict = {}
        cookie_header = {}
        return_dict["user_agent"] = user_agent

        if site_cookies_dict:
            cookie_header = http.get_cookie_header(site_cookies_dict)
            return_dict["cookie_header"] = cookie_header

        self.log.info("Fetching RSS Feed: '%s' with Cookie: '%s' and User-agent: '%s'." %
                      (rssfeed_data["name"], http.get_cookie_header(cookie_header), user_agent))

        # Will abort after 10 seconds if server doesn't answer
        try:
            parsed_feed = feedparser.parse(rssfeed_data["url"], request_headers=cookie_header,
                                           agent=user_agent,
                                           timeout=10)
        except Exception, e:
            self.log.warn("Exception occured in feedparser: " + str(e))
            self.log.warn("Feedparser was called with url: '%s' using cookies: '%s' and User-agent: '%s'" %
                          (rssfeed_data["url"], http.get_cookie_header(cookie_header), user_agent))
            self.log.warn("Stacktrace:\n" + common.get_exception_string())
            return None
 def test_get_link(self):
     file_url = yarss2.util.common.get_resource(common.testdata_rssfeed_filename, path="tests/")
     from yarss2.lib.feedparser import feedparser
     parsed_feed = feedparser.parse(file_url)
     item = None
     for e in parsed_feed["items"]:
         item = e
         break
     # Item has enclosure, so it should use that link
     self.assertEquals(self.rssfeedhandler.get_link(item), item.enclosures[0]["href"])
     del item["links"][:]
     # Item no longer has enclosures, so it should return the regular link
     self.assertEquals(self.rssfeedhandler.get_link(item), item["link"])
    def test_feedparser_ampersant_in_url(self):
        """A bug in feedparser resulted in URL containing &amp when XML Parser was not available.
        This test disables XML Parser and verifies that the URL is correct
        """
        file_path = common.get_resource("rss_with_ampersand_link.rss", path="tests")
        # This is the link in rss_with_ampersand_link.rss
        url = "http://hostname.com/Fetch?hash=2f21d4e59&digest=865178f9bc"
        expected = "http://hostname.com/Fetch?hash=2f21d4e59&digest=865178f9bc"
        # Disable XML Parser
        feedparser._XML_AVAILABLE = 0
        parsed_feeds = feedparser.parse(file_path)

        for item in parsed_feeds['items']:
            self.assertEquals(expected, item["link"])
            break
    def test_feedparser_ampersant_in_url(self):
        """A bug in feedparser resulted in URL containing &amp when XML Parser was not available.
        This test disables XML Parser and verifies that the URL is correct
        """
        file_path = common.get_resource("rss_with_ampersand_link.rss",
                                        path="tests")
        # This is the link in rss_with_ampersand_link.rss
        expected = "http://hostname.com/Fetch?hash=2f21d4e59&digest=865178f9bc"
        # Disable XML Parser
        feedparser._XML_AVAILABLE = 0
        parsed_feeds = feedparser.parse(file_path)

        for item in parsed_feeds['items']:
            self.assertEquals(expected, item["link"])
            break
    def test_feedparser_dates(self):
        file_url = yarss2.util.common.get_resource("rss_with_special_dates.rss", path="tests/data/")
        from yarss2.lib.feedparser import feedparser
        parsed_feed = feedparser.parse(file_url)

        for item in parsed_feed['items']:
            # Some RSS feeds do not have a proper timestamp
            dt = None
            if item.has_key('published_parsed'):
                published_parsed = item['published_parsed']
                #print "published_parsed:", published_parsed
                import time
                test_val = time.struct_time((2014, 4, 10, 3, 44, 14, 3, 100, -1))
                self.assertEquals(test_val, published_parsed)
                break
    def test_get_size(self):
        file_url = yarss2.util.common.get_resource("t1.rss", path="tests/data/feeds/")
        from yarss2.lib.feedparser import feedparser
        parsed_feed = feedparser.parse(file_url)

        size = self.rssfeedhandler.get_size(parsed_feed["items"][0])
        self.assertEquals(len(size), 1)
        self.assertEquals(size[0], (4541927915.52, u'4.23 GB'))

        size = self.rssfeedhandler.get_size(parsed_feed["items"][1])
        self.assertEquals(len(size), 1)
        self.assertEquals(size[0], (402349096.96, u'383.71 MB'))

        size = self.rssfeedhandler.get_size(parsed_feed["items"][2])
        self.assertEquals(len(size), 1)
        self.assertEquals(size[0], (857007476))

        size = self.rssfeedhandler.get_size(parsed_feed["items"][3])
        self.assertEquals(len(size), 2)
        self.assertEquals(size[0], (14353107637))
        self.assertEquals(size[1], (13529146982.4, u'12.6 GB'))
    def get_rssfeed_parsed(self, rssfeed_data, site_cookies_dict=None):
        """
        rssfeed_data: A dictionary containing rss feed data as stored in the YaRSS2 config.
        site_cookies_dict: A dictionary of cookie values to be used for this rssfeed.
        """
        return_dict = {}
        rssfeeds_dict = {}
        cookie_header = {}

        if site_cookies_dict:
            cookie_header = http.get_cookie_header(site_cookies_dict)
            return_dict["cookie_header"] = cookie_header

        self.log.info("Fetching RSS Feed: '%s' with Cookie: '%s'." % (rssfeed_data["name"], cookie_header))

        # Will abort after 10 seconds if server doesn't answer
        try:
            parsed_feed = feedparser.parse(rssfeed_data["url"], request_headers=cookie_header, agent=self.agent, timeout=10)
        except Exception, e:
            self.log.warn("Exception occured in feedparser:" + str(e))
            self.log.warn("Feedparser was called with url: '%s' and header: '%s'" % (rssfeed_data["url"], cookie_header))
            self.log.warn("Stacktrace:" + common.get_exception_string())
            return None