def get(url, user_agent, user=None): """ Return a Revision of a url. Check for the presence of a URL At its original location on the web. In the DHT. In the database. """ revision = Revision() if user and not user.can("retrieve_resource"): revision.status = 403 revision.mimetype = "text" revision.content = "This user account isn't allowed to retrieve resources." return revision # Ignore for now if the addr is on our LAN, VLAN or localhost. url = urlparse.urlparse(url) domain = url.netloc path = url.path or '/' try: host = socket.gethostbyname(domain) except: host = '' if host: log(host) # Deep assumptions about the future of ipv4 here. if any([host.startswith(subnet) for subnet in local_subnets]): revision.status = 403 revision.mimetype = "text" revision.content = "We're not currently proxying to local subnets." return revision # Check the web response = None try: log("Fetching %s from the original domain." % url.geturl()) response = requests.get(url.geturl(), headers={'User-Agent': user_agent}, timeout=app.config['HTTP_TIMEOUT']) except Exception, e: log("Error retrieving %s: %s" % (url.geturl(), e.message))