コード例 #1
0
ファイル: test_views.py プロジェクト: bookieio/bookie_parser
    def test_cached_webpage(self):
        """When we readable parse we cache the data in redis."""
        url = 'http://www.google.com/intl/en/about/index.html'
        hashed = generate_hash(url)
        resp = self.app.get(
            '/v',
            params={
                'url': url
            },
            status=302)

        # follow the redirect and we land at the actual page.
        resp = resp.follow()

        from bookie_parser.models import server
        # Make sure the data exists in redis
        self.assertTrue(server.get(hashed), 'The key is found.')

        # Now hit up our redis server and find what data we've stored.
        data = WebPageMgr.get(hash_id=hashed)

        self.assertEqual(
            url, data.url,
            "The url is stored in the root object")
        self.assertEqual(
            hashed, data.hash_id,
            "The hash is stored in the root object")
        self.assertTrue(
            data.request is not None,
            'The request is stored in the cache.')
        self.assertEqual(
            u'Google  - About Google',
            data.title)
        self.assertTrue(data.readable is not None)
コード例 #2
0
ファイル: views.py プロジェクト: bookieio/bookie_parser
def api_hash(request):
    """Fetch the data based on the given hash id."""
    # Look up the data from the hash_id
    hash_id = request.matchdict.get('hash_id', None)
    if not hash_id:
        LOG.debug('no hash id supplied: ' + hash_id)
        return HTTPNotFound()

    exists = WebPageMgr.exists(hash_id=hash_id)
    if not exists:
        request.response.status_int = 404
        return {
            'error': 'Hash id not found: ' + hash_id,
        }

    page = WebPageMgr.get(exists)
    if not page:
        LOG.debug('notfound: ' + hash_id)
        return HTTPNotFound()

    resp = request.response
    environ = request.environ
    resp.headers['Content-Type'] = 'application/json; charset="utf8"'
    # allow cross domain requests: xdr
    resp.headers['Access-Control-Allow-Origin'] = environ.get(
        'HTTP_ORIGIN', "")

    return {
        'data': dict(page),
        'readable': page.readable,
    }
コード例 #3
0
ファイル: views.py プロジェクト: bookieio/bookie_parser
def url(request):
    """"""
    # Look up the url from the hash_id
    hash_id = request.matchdict.get('hash_id', None)
    page = WebPageMgr.get(hash_id)

    if page:
        return {
            'webpage': page,
        }
    else:
        return HTTPNotFound()
コード例 #4
0
ファイル: views.py プロジェクト: bookieio/bookie_parser
def readable(request):
    """This is the old api endpoint that returns json data.

    """
    url = request.params.get('url', None)
    LOG.debug('readable process, ' + url)

    if not url:
        LOG.debug('notfound,' + url)
        return HTTPNotFound()

    url = url.strip('/')
    LOG.debug('Checking url: ' + url)

    request.response.headers['Content-Type'] = \
        'application/json; charset="utf8"'

    # allow cross domain requests: xdr
    request.response.headers['Access-Control-Allow-Origin'] = '*'

    exists = WebPageMgr.exists(url=url)
    if exists:
        page = WebPageMgr.get(exists)
        return {
            'data': dict(page),
            'readable': page.readable,
        }
    else:
        LOG.debug('Does not Exist: ...fetching')
        read = ReadableRequest(url)
        read.process()

        if not read.is_error:
            page = WebPageMgr.store_request(read)

            return {
                'data': dict(page),
                'readable': page.readable
            }
        else:
            LOG.error('url_is_error,' + url)
            request.response.status_int = 500
            error_message = 'There was an error reading the page.'
            return {
                'error': error_message
            }