Exemple #1
0
def get(url, user_agent, user=None):
    """
    Return a Revision of a url.
    Check for the presence of a URL
     At its original location on the web.
     In the DHT.
     In the database.
    """
    revision = Revision()

    if user and not user.can("retrieve_resource"):
        revision.status   = 403
        revision.mimetype = "text"
        revision.content  = "This user account isn't allowed to retrieve resources."
        return revision

    # Ignore for now if the addr is on our LAN, VLAN or localhost.
    url    = urlparse.urlparse(url)
    domain = url.netloc
    path   = url.path or '/'

    try:    host = socket.gethostbyname(domain)
    except: host = ''

    if host:
        log(host)

    # Deep assumptions about the future of ipv4 here.
    if any([host.startswith(subnet) for subnet in local_subnets]):
        revision.status   = 403
        revision.mimetype = "text"
        revision.content  = "We're not currently proxying to local subnets."
        return revision

    # Check the web
    response = None
    try:
        log("Fetching %s from the original domain." % url.geturl())
        response = requests.get(url.geturl(), headers={'User-Agent': user_agent}, 
            timeout=app.config['HTTP_TIMEOUT'])
    except Exception, e:
        log("Error retrieving %s: %s" % (url.geturl(), e.message))