예제 #1
0
def api_hash(request):
    """Fetch the data based on the given hash id."""
    # Look up the data from the hash_id
    hash_id = request.matchdict.get('hash_id', None)
    if not hash_id:
        LOG.debug('no hash id supplied: ' + hash_id)
        return HTTPNotFound()

    exists = WebPageMgr.exists(hash_id=hash_id)
    if not exists:
        request.response.status_int = 404
        return {
            'error': 'Hash id not found: ' + hash_id,
        }

    page = WebPageMgr.get(exists)
    if not page:
        LOG.debug('notfound: ' + hash_id)
        return HTTPNotFound()

    resp = request.response
    environ = request.environ
    resp.headers['Content-Type'] = 'application/json; charset="utf8"'
    # allow cross domain requests: xdr
    resp.headers['Access-Control-Allow-Origin'] = environ.get(
        'HTTP_ORIGIN', "")

    return {
        'data': dict(page),
        'readable': page.readable,
    }
예제 #2
0
    def test_cached_webpage(self):
        """When we readable parse we cache the data in redis."""
        url = 'http://www.google.com/intl/en/about/index.html'
        hashed = generate_hash(url)
        resp = self.app.get(
            '/v',
            params={
                'url': url
            },
            status=302)

        # follow the redirect and we land at the actual page.
        resp = resp.follow()

        from bookie_parser.models import server
        # Make sure the data exists in redis
        self.assertTrue(server.get(hashed), 'The key is found.')

        # Now hit up our redis server and find what data we've stored.
        data = WebPageMgr.get(hash_id=hashed)

        self.assertEqual(
            url, data.url,
            "The url is stored in the root object")
        self.assertEqual(
            hashed, data.hash_id,
            "The hash is stored in the root object")
        self.assertTrue(
            data.request is not None,
            'The request is stored in the cache.')
        self.assertEqual(
            u'Google  - About Google',
            data.title)
        self.assertTrue(data.readable is not None)
예제 #3
0
def readable(request):
    """This is the old api endpoint that returns json data.

    """
    url = request.params.get('url', None)
    LOG.debug('readable process, ' + url)

    if not url:
        LOG.debug('notfound,' + url)
        return HTTPNotFound()

    url = url.strip('/')
    LOG.debug('Checking url: ' + url)

    request.response.headers['Content-Type'] = \
        'application/json; charset="utf8"'

    # allow cross domain requests: xdr
    request.response.headers['Access-Control-Allow-Origin'] = '*'

    exists = WebPageMgr.exists(url=url)
    if exists:
        page = WebPageMgr.get(exists)
        return {
            'data': dict(page),
            'readable': page.readable,
        }
    else:
        LOG.debug('Does not Exist: ...fetching')
        read = ReadableRequest(url)
        read.process()

        if not read.is_error:
            page = WebPageMgr.store_request(read)

            return {
                'data': dict(page),
                'readable': page.readable
            }
        else:
            LOG.error('url_is_error,' + url)
            request.response.status_int = 500
            error_message = 'There was an error reading the page.'
            return {
                'error': error_message
            }
예제 #4
0
def view(request):
    """This is the 'usable' endpoint that displays the trimmed content for
    reading.

    """
    # fetch download of the url
    url = request.params.get('url', None)
    LOG.debug('process, ' + url)

    if not url:
        LOG.debug('notfound,' + url)
        return HTTPNotFound()

    url = url.strip('/')
    LOG.debug('Checking url: ' + url)

    exists = WebPageMgr.exists(url=url)
    if exists:
        LOG.debug('Exists: ...forwarding')
        return HTTPFound(
            location=request.route_url('url', hash_id=exists))

    else:
        LOG.debug('Does not Exist: ...fetching')
        read = ReadableRequest(url)
        read.process()

        if not read.is_error:
            LOG.warning('writing it out')
            page = WebPageMgr.store_request(read)
            return HTTPFound(
                location=request.route_url('url', hash_id=page.hash_id))
        else:
            LOG.error('url_is_error,' + url)
            return render_to_response(
                'error.mako', {
                    'error_message': 'There was an error fetching the url.',
                    'error_details': {
                        'code': read.status_code,
                    },
                    'readable': read,
                    'title': 'Processing Error',
                },
                request=request
            )
예제 #5
0
def api_parse(request):
    """Api to parse a url POST'd"""
    url = request.params.get('url', None)

    resp = request.response
    environ = request.environ
    resp.headers['Access-Control-Allow-Origin'] = environ.get(
        'HTTP_ORIGIN', '')

    if not url:
        params = request.json_body
        LOG.debug(params)
        url = params.get('url', None)

        if not url:
            request.response.status_int = 404
            return {
                'error': 'No url supplied.',
            }

    LOG.debug('api process, ' + url)

    url = url.strip('/')
    LOG.debug('Checking url: ' + url)
    exists = WebPageMgr.exists(url=url)
    if exists:
        LOG.debug('Exists: ...forwarding')
        request.matchdict['hash_id'] = exists
        return api_hash(request)
    else:
        LOG.debug('Does not Exist: ...fetching')
        read = ReadableRequest(url)
        read.process()

        if not read.is_error:
            page = WebPageMgr.store_request(read)
            request.matchdict['hash_id'] = page.hash_id
            return api_hash(request)
        else:
            LOG.error('url_is_error,' + url)
            request.response.status_int = 500
            return {
                'error': 'There was an error fetching content.',
            }
예제 #6
0
def url(request):
    """"""
    # Look up the url from the hash_id
    hash_id = request.matchdict.get('hash_id', None)
    page = WebPageMgr.get(hash_id)

    if page:
        return {
            'webpage': page,
        }
    else:
        return HTTPNotFound()