def test_cached_webpage(self): """When we readable parse we cache the data in redis.""" url = 'http://www.google.com/intl/en/about/index.html' hashed = generate_hash(url) resp = self.app.get( '/v', params={ 'url': url }, status=302) # follow the redirect and we land at the actual page. resp = resp.follow() from bookie_parser.models import server # Make sure the data exists in redis self.assertTrue(server.get(hashed), 'The key is found.') # Now hit up our redis server and find what data we've stored. data = WebPageMgr.get(hash_id=hashed) self.assertEqual( url, data.url, "The url is stored in the root object") self.assertEqual( hashed, data.hash_id, "The hash is stored in the root object") self.assertTrue( data.request is not None, 'The request is stored in the cache.') self.assertEqual( u'Google - About Google', data.title) self.assertTrue(data.readable is not None)
def api_hash(request): """Fetch the data based on the given hash id.""" # Look up the data from the hash_id hash_id = request.matchdict.get('hash_id', None) if not hash_id: LOG.debug('no hash id supplied: ' + hash_id) return HTTPNotFound() exists = WebPageMgr.exists(hash_id=hash_id) if not exists: request.response.status_int = 404 return { 'error': 'Hash id not found: ' + hash_id, } page = WebPageMgr.get(exists) if not page: LOG.debug('notfound: ' + hash_id) return HTTPNotFound() resp = request.response environ = request.environ resp.headers['Content-Type'] = 'application/json; charset="utf8"' # allow cross domain requests: xdr resp.headers['Access-Control-Allow-Origin'] = environ.get( 'HTTP_ORIGIN', "") return { 'data': dict(page), 'readable': page.readable, }
def url(request): """""" # Look up the url from the hash_id hash_id = request.matchdict.get('hash_id', None) page = WebPageMgr.get(hash_id) if page: return { 'webpage': page, } else: return HTTPNotFound()
def readable(request): """This is the old api endpoint that returns json data. """ url = request.params.get('url', None) LOG.debug('readable process, ' + url) if not url: LOG.debug('notfound,' + url) return HTTPNotFound() url = url.strip('/') LOG.debug('Checking url: ' + url) request.response.headers['Content-Type'] = \ 'application/json; charset="utf8"' # allow cross domain requests: xdr request.response.headers['Access-Control-Allow-Origin'] = '*' exists = WebPageMgr.exists(url=url) if exists: page = WebPageMgr.get(exists) return { 'data': dict(page), 'readable': page.readable, } else: LOG.debug('Does not Exist: ...fetching') read = ReadableRequest(url) read.process() if not read.is_error: page = WebPageMgr.store_request(read) return { 'data': dict(page), 'readable': page.readable } else: LOG.error('url_is_error,' + url) request.response.status_int = 500 error_message = 'There was an error reading the page.' return { 'error': error_message }