Example #1
0
def validateStream(aFile, firstOccurrenceOnly=0, contentType=None, base=""):
    loggedEvents = []

    if contentType:
        (mediaType, charset) = mediaTypes.checkValid(contentType, loggedEvents)
    else:
        (mediaType, charset) = (None, None)

    rawdata = aFile.read(MAXDATALENGTH)
    if aFile.read(1):
        raise ValidationFailure(
            logging.ValidatorLimit(
                {'limit': 'feed length > ' + str(MAXDATALENGTH) + ' bytes'}))

    encoding, rawdata = xmlEncoding.decode(mediaType,
                                           charset,
                                           rawdata,
                                           loggedEvents,
                                           fallback='utf-8')

    validator = _validate(rawdata,
                          firstOccurrenceOnly,
                          loggedEvents,
                          base,
                          encoding,
                          mediaType=mediaType)

    if mediaType and validator.feedType:
        mediaTypes.checkAgainstFeedType(mediaType, validator.feedType,
                                        validator.loggedEvents)

    return {
        "feedType": validator.feedType,
        "loggedEvents": validator.loggedEvents
    }
Example #2
0
def validateURL(url, firstOccurrenceOnly=1, wantRawData=0):
    """validate RSS from URL, returns events list, or (events, rawdata) tuple"""
    loggedEvents = []
    request = urllib2.Request(url)
    request.add_header("Accept-encoding", "gzip, deflate")
    request.add_header("User-Agent", "FeedValidator/1.3")
    try:
        usock = urllib2.urlopen(request)
        rawdata = usock.read(MAXDATALENGTH)
        if usock.read(1):
            raise ValidationFailure(
                logging.ValidatorLimit({
                    'limit':
                    'feed length > ' + str(MAXDATALENGTH) + ' bytes'
                }))

        # check for temporary redirects
        if usock.geturl() <> request.get_full_url():
            from httplib import HTTPConnection
            spliturl = url.split('/', 3)
            if spliturl[0] == "http:":
                conn = HTTPConnection(spliturl[2])
                conn.request("GET", '/' + spliturl[3].split("#", 1)[0])
                resp = conn.getresponse()
                if resp.status <> 301:
                    loggedEvents.append(TempRedirect({}))

    except BadStatusLine, status:
        raise ValidationFailure(logging.HttpError({'status':
                                                   status.__class__}))
Example #3
0
def validateURL(url, firstOccurrenceOnly=1, wantRawData=0):
    """validate RSS from URL, returns events list, or (events, rawdata) tuple"""
    loggedEvents = []
    request = urllib2.Request(url)
    request.add_header("Accept-encoding", "gzip, deflate")
    request.add_header("User-Agent", "FeedValidator/1.3")
    usock = None
    try:
        try:
            usock = urllib2.urlopen(request)
            rawdata = usock.read(MAXDATALENGTH)
            if usock.read(1):
                raise ValidationFailure(
                    logging.ValidatorLimit({
                        'limit':
                        'feed length > ' + str(MAXDATALENGTH) + ' bytes'
                    }))

            # check for temporary redirects
            if usock.geturl() <> request.get_full_url():
                from urlparse import urlsplit
                (scheme, netloc, path, query, fragment) = urlsplit(url)
                if scheme == 'http':
                    from httplib import HTTPConnection
                    requestUri = (path or '/') + (query and '?' + query)

                    conn = HTTPConnection(netloc)
                    conn.request("GET", requestUri)
                    resp = conn.getresponse()
                    if resp.status <> 301:
                        loggedEvents.append(TempRedirect({}))

        except BadStatusLine, status:
            raise ValidationFailure(
                logging.HttpError({'status': status.__class__}))

        except urllib2.HTTPError, status:
            rawdata = status.read()
            if len(rawdata) < 512 or 'content-encoding' in status.headers:
                loggedEvents.append(logging.HttpError({'status': status}))
                usock = status
            else:
                rawdata = re.sub('<!--.*?-->', '', rawdata)
                lastline = rawdata.strip().split('\n')[-1].strip()
                if sniffPossibleFeed(rawdata):
                    loggedEvents.append(logging.HttpError({'status': status}))
                    loggedEvents.append(logging.HttpErrorWithPossibleFeed({}))
                    usock = status
                else:
                    raise ValidationFailure(
                        logging.HttpError({'status': status}))
Example #4
0
def validateURL(url, firstOccurrenceOnly=1, wantRawData=0):
    """validate RSS from URL, returns events list, or (events, rawdata) tuple"""
    loggedEvents = []
    request = urllib2.Request(url)
    request.add_header("Accept-encoding", "gzip, deflate")
    request.add_header("User-Agent", "FeedValidator/1.3")
    usock = None
    try:
        try:
            usock = urllib2.urlopen(request)
            rawdata = usock.read(MAXDATALENGTH)
            if usock.read(1):
                raise ValidationFailure(
                    logging.ValidatorLimit({
                        'limit':
                        'feed length > ' + str(MAXDATALENGTH) + ' bytes'
                    }))

            # check for temporary redirects
            if usock.geturl() <> request.get_full_url():
                from httplib import HTTPConnection
                spliturl = url.split('/', 3)
                if spliturl[0] == "http:":
                    conn = HTTPConnection(spliturl[2])
                    conn.request("GET", '/' + spliturl[3].split("#", 1)[0])
                    resp = conn.getresponse()
                    if resp.status <> 301:
                        loggedEvents.append(TempRedirect({}))

        except BadStatusLine, status:
            raise ValidationFailure(
                logging.HttpError({'status': status.__class__}))

        except urllib2.HTTPError, status:
            rawdata = status.read()
            if len(rawdata) < 512 or 'content-encoding' in status.headers:
                loggedEvents.append(logging.HttpError({'status': status}))
                usock = status
            else:
                rawdata = re.sub('<!--.*?-->', '', rawdata)
                lastline = rawdata.strip().split('\n')[-1].strip()
                if lastline in ['</rss>', '</feed>', '</rdf:RDF>', '</kml>']:
                    loggedEvents.append(logging.HttpError({'status': status}))
                    usock = status
                else:
                    raise ValidationFailure(
                        logging.HttpError({'status': status}))