Esempio n. 1
0
    def _check_feed(self):
        """Validate stream a bit - failing fast where possible.

           Constructs an equivalent(ish) HEAD request,
           without re-writing feedparser completely.
           (it never times out if reading from a stream - see #2257)"""
        req = feedparser._build_urllib2_request(
            self.uri, feedparser.USER_AGENT, None, None, None, None, {})
        req.method = "HEAD"
        opener = build_opener(feedparser._FeedURLHandler())
        try:
            result = opener.open(req)
            ct_hdr = result.headers.get('Content-Type', "Unknown type")
            content_type = ct_hdr.split(';')[0]
            try:
                status = result.code if PY2 else result.status
            except AttributeError:
                print_w("Missing status code for feed %s" % self.uri)
            else:
                print_d("Pre-check: %s returned %s with content type '%s'" %
                        (self.uri, status, content_type))
                if content_type not in feedparser.ACCEPT_HEADER:
                    print_w("Unusable content: %s. Perhaps %s is not a feed?" %
                            (content_type, self.uri))
                    return False
                # No real need to check HTTP Status - errors are very unlikely
                # to be a usable content type, and we should try to parse
        finally:
            opener.close()
        return True
Esempio n. 2
0
    def _check_feed(self):
        """Validate stream a bit - failing fast where possible.

           Constructs an equivalent(ish) HEAD request,
           without re-writing feedparser completely.
           (it never times out if reading from a stream - see #2257)"""
        req = feedparser._build_urllib2_request(
            self.uri, feedparser.USER_AGENT, None, None, None, None, {})
        req.method = "HEAD"
        opener = build_opener(feedparser._FeedURLHandler())
        try:
            result = opener.open(req)
            ct_hdr = result.headers.get('Content-Type', "Unknown type")
            content_type = ct_hdr.split(';')[0]
            status = result.code if PY2 else result.status
            print_d("Pre-check: %s returned %s with content type '%s'" %
                    (self.uri, status, content_type))
            if content_type not in feedparser.ACCEPT_HEADER:
                print_w("Unusable content: %s. Perhaps %s is not a feed?" %
                        (content_type, self.uri))
                return False
            # No real need to check HTTP Status - errors are very unlikely
            # to be a usable content type, and we should always try to parse
        finally:
            opener.close()
        return True
Esempio n. 3
0
 def test_urllib_build_opener(self):
     for url in self.GOOD:
         build_opener().open(url).close()
     for url in self.BAD:
         with self.assertRaises(Exception):
             build_opener().open(url).close()
Esempio n. 4
0
 def test_urllib_build_opener(self):
     for url in self.GOOD:
         build_opener().open(url).close()
     for url in self.BAD:
         with self.assertRaises(Exception):
             build_opener().open(url).close()