コード例 #1
0
ファイル: __init__.py プロジェクト: lugrus2000/feedvalidator
def validateURL(url, firstOccurrenceOnly=1, wantRawData=0):
    """validate RSS from URL, returns events list, or (events, rawdata) tuple"""
    loggedEvents = []
    request = urllib2.Request(url)
    request.add_header("Accept-encoding", "gzip, deflate")
    request.add_header("User-Agent", "FeedValidator/1.3")
    usock = None
    try:
        try:
            usock = urllib2.urlopen(request)
            rawdata = usock.read(MAXDATALENGTH)
            if usock.read(1):
                raise ValidationFailure(
                    logging.ValidatorLimit({
                        'limit':
                        'feed length > ' + str(MAXDATALENGTH) + ' bytes'
                    }))

            # check for temporary redirects
            if usock.geturl() <> request.get_full_url():
                from urlparse import urlsplit
                (scheme, netloc, path, query, fragment) = urlsplit(url)
                if scheme == 'http':
                    from httplib import HTTPConnection
                    requestUri = (path or '/') + (query and '?' + query)

                    conn = HTTPConnection(netloc)
                    conn.request("GET", requestUri)
                    resp = conn.getresponse()
                    if resp.status <> 301:
                        loggedEvents.append(TempRedirect({}))

        except BadStatusLine, status:
            raise ValidationFailure(
                logging.HttpError({'status': status.__class__}))

        except urllib2.HTTPError, status:
            rawdata = status.read()
            if len(rawdata) < 512 or 'content-encoding' in status.headers:
                loggedEvents.append(logging.HttpError({'status': status}))
                usock = status
            else:
                rawdata = re.sub('<!--.*?-->', '', rawdata)
                lastline = rawdata.strip().split('\n')[-1].strip()
                if sniffPossibleFeed(rawdata):
                    loggedEvents.append(logging.HttpError({'status': status}))
                    loggedEvents.append(logging.HttpErrorWithPossibleFeed({}))
                    usock = status
                else:
                    raise ValidationFailure(
                        logging.HttpError({'status': status}))
コード例 #2
0
def validateURL(url, firstOccurrenceOnly=1, wantRawData=0):
    """validate RSS from URL, returns events list, or (events, rawdata) tuple"""
    loggedEvents = []
    request = urllib2.Request(url)
    request.add_header("Accept-encoding", "gzip, deflate")
    request.add_header("User-Agent", "FeedValidator/1.3")
    try:
        usock = urllib2.urlopen(request)
        rawdata = usock.read(MAXDATALENGTH)
        if usock.read(1):
            raise ValidationFailure(
                logging.ValidatorLimit({
                    'limit':
                    'feed length > ' + str(MAXDATALENGTH) + ' bytes'
                }))

        # check for temporary redirects
        if usock.geturl() <> request.get_full_url():
            from httplib import HTTPConnection
            spliturl = url.split('/', 3)
            if spliturl[0] == "http:":
                conn = HTTPConnection(spliturl[2])
                conn.request("GET", '/' + spliturl[3].split("#", 1)[0])
                resp = conn.getresponse()
                if resp.status <> 301:
                    loggedEvents.append(TempRedirect({}))

    except BadStatusLine, status:
        raise ValidationFailure(logging.HttpError({'status':
                                                   status.__class__}))
コード例 #3
0
ファイル: __init__.py プロジェクト: point97/madrona
def validateURL(url, firstOccurrenceOnly=1, wantRawData=0):
    """validate RSS from URL, returns events list, or (events, rawdata) tuple"""
    loggedEvents = []
    request = urllib2.Request(url)
    request.add_header("Accept-encoding", "gzip, deflate")
    request.add_header("User-Agent", "FeedValidator/1.3")
    usock = None
    try:
        try:
            usock = urllib2.urlopen(request)
            rawdata = usock.read(MAXDATALENGTH)
            if usock.read(1):
                raise ValidationFailure(
                    logging.ValidatorLimit({
                        'limit':
                        'feed length > ' + str(MAXDATALENGTH) + ' bytes'
                    }))

            # check for temporary redirects
            if usock.geturl() <> request.get_full_url():
                from httplib import HTTPConnection
                spliturl = url.split('/', 3)
                if spliturl[0] == "http:":
                    conn = HTTPConnection(spliturl[2])
                    conn.request("GET", '/' + spliturl[3].split("#", 1)[0])
                    resp = conn.getresponse()
                    if resp.status <> 301:
                        loggedEvents.append(TempRedirect({}))

        except BadStatusLine, status:
            raise ValidationFailure(
                logging.HttpError({'status': status.__class__}))

        except urllib2.HTTPError, status:
            rawdata = status.read()
            if len(rawdata) < 512 or 'content-encoding' in status.headers:
                loggedEvents.append(logging.HttpError({'status': status}))
                usock = status
            else:
                rawdata = re.sub('<!--.*?-->', '', rawdata)
                lastline = rawdata.strip().split('\n')[-1].strip()
                if lastline in ['</rss>', '</feed>', '</rdf:RDF>', '</kml>']:
                    loggedEvents.append(logging.HttpError({'status': status}))
                    usock = status
                else:
                    raise ValidationFailure(
                        logging.HttpError({'status': status}))
コード例 #4
0
ファイル: __init__.py プロジェクト: melody40/monorepo
         if resp.status<>301:
           loggedEvents.append(TempRedirect({}))
 
   except BadStatusLine, status:
     raise ValidationFailure(logging.HttpError({'status': status.__class__}))
 
   except urllib2.HTTPError, status:
     rawdata = status.read()
     lastline = rawdata.strip().split('\n')[-1].strip()
     if lastline in ['</rss>','</feed>','</rdf:RDF>']:
       loggedEvents.append(logging.HttpError({'status': status}))
       usock = status
     else:
       raise ValidationFailure(logging.HttpError({'status': status}))
   except urllib2.URLError, x:
     raise ValidationFailure(logging.HttpError({'status': x.reason}))
   except Timeout, x:
     raise ValidationFailure(logging.IOError({"message": 'Server timed out', "exception":x}))
 
   if usock.headers.get('content-encoding', None) == None:
     loggedEvents.append(Uncompressed({}))
 
   if usock.headers.get('content-encoding', None) == 'gzip':
     import gzip, StringIO
     try:
       rawdata = gzip.GzipFile(fileobj=StringIO.StringIO(rawdata)).read()
     except:
       import sys
       exctype, value = sys.exc_info()[:2]
       event=logging.IOError({"message": 'Server response declares Content-Encoding: gzip', "exception":value})
       raise ValidationFailure(event)
コード例 #5
0
            if spliturl[0] == "http:":
                conn = HTTPConnection(spliturl[2])
                conn.request("GET", '/' + spliturl[3].split("#", 1)[0])
                resp = conn.getresponse()
                if resp.status <> 301:
                    loggedEvents.append(TempRedirect({}))

    except BadStatusLine, status:
        raise ValidationFailure(logging.HttpError({'status':
                                                   status.__class__}))

    except urllib2.HTTPError, status:
        rawdata = status.read()
        lastline = rawdata.strip().split('\n')[-1].strip()
        if lastline in ['</rss>', '</feed>', '</rdf:RDF>']:
            loggedEvents.append(logging.HttpError({'status': status}))
            usock = status
        else:
            raise ValidationFailure(logging.HttpError({'status': status}))
    except urllib2.URLError, x:
        raise ValidationFailure(logging.HttpError({'status': x.reason}))
    except Timeout, x:
        raise ValidationFailure(
            logging.IOError({
                "message": 'Server timed out',
                "exception": x
            }))

    if usock.headers.get('content-encoding', None) == None:
        loggedEvents.append(Uncompressed({}))