def validateStream(aFile, firstOccurrenceOnly=0, contentType=None, base=""): loggedEvents = [] if contentType: (mediaType, charset) = mediaTypes.checkValid(contentType, loggedEvents) else: (mediaType, charset) = (None, None) rawdata = aFile.read(MAXDATALENGTH) if aFile.read(1): raise ValidationFailure( logging.ValidatorLimit( {'limit': 'feed length > ' + str(MAXDATALENGTH) + ' bytes'})) encoding, rawdata = xmlEncoding.decode(mediaType, charset, rawdata, loggedEvents, fallback='utf-8') validator = _validate(rawdata, firstOccurrenceOnly, loggedEvents, base, encoding, mediaType=mediaType) if mediaType and validator.feedType: mediaTypes.checkAgainstFeedType(mediaType, validator.feedType, validator.loggedEvents) return { "feedType": validator.feedType, "loggedEvents": validator.loggedEvents }
def validateURL(url, firstOccurrenceOnly=1, wantRawData=0): """validate RSS from URL, returns events list, or (events, rawdata) tuple""" loggedEvents = [] request = urllib2.Request(url) request.add_header("Accept-encoding", "gzip, deflate") request.add_header("User-Agent", "FeedValidator/1.3") try: usock = urllib2.urlopen(request) rawdata = usock.read(MAXDATALENGTH) if usock.read(1): raise ValidationFailure( logging.ValidatorLimit({ 'limit': 'feed length > ' + str(MAXDATALENGTH) + ' bytes' })) # check for temporary redirects if usock.geturl() <> request.get_full_url(): from httplib import HTTPConnection spliturl = url.split('/', 3) if spliturl[0] == "http:": conn = HTTPConnection(spliturl[2]) conn.request("GET", '/' + spliturl[3].split("#", 1)[0]) resp = conn.getresponse() if resp.status <> 301: loggedEvents.append(TempRedirect({})) except BadStatusLine, status: raise ValidationFailure(logging.HttpError({'status': status.__class__}))
def validateURL(url, firstOccurrenceOnly=1, wantRawData=0): """validate RSS from URL, returns events list, or (events, rawdata) tuple""" loggedEvents = [] request = urllib2.Request(url) request.add_header("Accept-encoding", "gzip, deflate") request.add_header("User-Agent", "FeedValidator/1.3") usock = None try: try: usock = urllib2.urlopen(request) rawdata = usock.read(MAXDATALENGTH) if usock.read(1): raise ValidationFailure( logging.ValidatorLimit({ 'limit': 'feed length > ' + str(MAXDATALENGTH) + ' bytes' })) # check for temporary redirects if usock.geturl() <> request.get_full_url(): from urlparse import urlsplit (scheme, netloc, path, query, fragment) = urlsplit(url) if scheme == 'http': from httplib import HTTPConnection requestUri = (path or '/') + (query and '?' + query) conn = HTTPConnection(netloc) conn.request("GET", requestUri) resp = conn.getresponse() if resp.status <> 301: loggedEvents.append(TempRedirect({})) except BadStatusLine, status: raise ValidationFailure( logging.HttpError({'status': status.__class__})) except urllib2.HTTPError, status: rawdata = status.read() if len(rawdata) < 512 or 'content-encoding' in status.headers: loggedEvents.append(logging.HttpError({'status': status})) usock = status else: rawdata = re.sub('<!--.*?-->', '', rawdata) lastline = rawdata.strip().split('\n')[-1].strip() if sniffPossibleFeed(rawdata): loggedEvents.append(logging.HttpError({'status': status})) loggedEvents.append(logging.HttpErrorWithPossibleFeed({})) usock = status else: raise ValidationFailure( logging.HttpError({'status': status}))
def validateURL(url, firstOccurrenceOnly=1, wantRawData=0): """validate RSS from URL, returns events list, or (events, rawdata) tuple""" loggedEvents = [] request = urllib2.Request(url) request.add_header("Accept-encoding", "gzip, deflate") request.add_header("User-Agent", "FeedValidator/1.3") usock = None try: try: usock = urllib2.urlopen(request) rawdata = usock.read(MAXDATALENGTH) if usock.read(1): raise ValidationFailure( logging.ValidatorLimit({ 'limit': 'feed length > ' + str(MAXDATALENGTH) + ' bytes' })) # check for temporary redirects if usock.geturl() <> request.get_full_url(): from httplib import HTTPConnection spliturl = url.split('/', 3) if spliturl[0] == "http:": conn = HTTPConnection(spliturl[2]) conn.request("GET", '/' + spliturl[3].split("#", 1)[0]) resp = conn.getresponse() if resp.status <> 301: loggedEvents.append(TempRedirect({})) except BadStatusLine, status: raise ValidationFailure( logging.HttpError({'status': status.__class__})) except urllib2.HTTPError, status: rawdata = status.read() if len(rawdata) < 512 or 'content-encoding' in status.headers: loggedEvents.append(logging.HttpError({'status': status})) usock = status else: rawdata = re.sub('<!--.*?-->', '', rawdata) lastline = rawdata.strip().split('\n')[-1].strip() if lastline in ['</rss>', '</feed>', '</rdf:RDF>', '</kml>']: loggedEvents.append(logging.HttpError({'status': status})) usock = status else: raise ValidationFailure( logging.HttpError({'status': status}))