Esempio n. 1
0
def download():
    browser_type, archive, url = get_params()

    response_key = get_cache_key(archive, browser_type, url)

    result = rc.get(response_key)
    if not result:
        raise HTTPError(status=404, body='Url Not Archived')

    result = json.loads(result)
    if not 'download_url' in result:
        raise HTTPError(status=404, body='Download Not Available')

    headers = {}
    session = result.get('download_session')

    if session:
        headers['Cookie'] = session

    r = requests.get(result['download_url'], headers=headers, stream=True)

    if r.status_code != 200:
        raise HTTPError(status=400,
                        body='Invalid Download Result: {0} {1}'.format(
                            r.status_code, r.reason))

    pass_headers = ('Content-Disposition', 'Content-Length', 'Content-Type')

    for h in pass_headers:
        response.set_header(h, r.headers.get(h))

    response.body = r.iter_content()
    return response
Esempio n. 2
0
def download():
    browser_type, archive, url = get_params()

    response_key = get_cache_key(archive, browser_type, url)

    result = rc.get(response_key)
    if not result:
        raise HTTPError(status=404, body='Url Not Archived')

    result = json.loads(result)
    if not 'download_url' in result:
        raise HTTPError(status=404, body='Download Not Available')

    headers = {}
    session = result.get('download_session')

    if session:
        headers['Cookie'] = session

    r = requests.get(result['download_url'],
                     headers=headers,
                     stream=True)

    if r.status_code != 200:
        raise HTTPError(status=400, body='Invalid Download Result: {0} {1}'.format(r.status_code, r.reason))

    pass_headers = ('Content-Disposition', 'Content-Length', 'Content-Type')

    for h in pass_headers:
        response.set_header(h, r.headers.get(h))

    response.body = r.iter_content()
    return response
Esempio n. 3
0
def archive_page():
    browser_type, archive, url = get_params()

    response_key = get_cache_key(archive, browser_type, url)
    wait_key = get_wait_key(archive, browser_type, url)

    queue_key = get_queue_key(browser_type)

    result = None

    if not rc.exists(response_key):
        cmd = dict(request.query)
        cmd['url'] = url

        num = rc.incr('total_urls:' + browser_type)
        cmd['num'] = num

        cmd = json.dumps(cmd)

        with pipeline(rc) as pi:
            waiting_str = {'archived': False,
                           'queued': True,
                           'num': num}

            pi.set(response_key, json.dumps(waiting_str))
            pi.rpush(queue_key, cmd)

        rc.blpop(wait_key, theconfig['wait_timeout_secs'])

    result = rc.get(response_key)

    if result:
        result = json.loads(result)

        if 'queued' in result:
            result['queue_pos'] = 0
            front = rc.lindex(queue_key, 0)
            if front:
                front = json.loads(front)
                front_num = front.get('num', 0)

                # pos == 1 implies this url is next up
                # pos <= 0 implies this url was removed from queue and is being processed
                pos = result['num'] - front_num + 1
                result['queue_pos'] = pos
        else:
            result['ttl'] = rc.ttl(response_key)
    else:
        result = ERROR_RESP

    return result
Esempio n. 4
0
def archive_page():
    browser_type, archive, url = get_params()

    response_key = get_cache_key(archive, browser_type, url)
    wait_key = get_wait_key(archive, browser_type, url)

    queue_key = get_queue_key(browser_type)

    result = None

    if not rc.exists(response_key):
        cmd = dict(request.query)
        cmd['url'] = url

        num = rc.incr('total_urls:' + browser_type)
        cmd['num'] = num

        cmd = json.dumps(cmd)

        with pipeline(rc) as pi:
            waiting_str = {'archived': False, 'queued': True, 'num': num}

            pi.set(response_key, json.dumps(waiting_str))
            pi.rpush(queue_key, cmd)

        rc.blpop(wait_key, theconfig['wait_timeout_secs'])

    result = rc.get(response_key)

    if result:
        result = json.loads(result)

        if 'queued' in result:
            result['queue_pos'] = 0
            front = rc.lindex(queue_key, 0)
            if front:
                front = json.loads(front)
                front_num = front.get('num', 0)

                # pos == 1 implies this url is next up
                # pos <= 0 implies this url was removed from queue and is being processed
                pos = result['num'] - front_num + 1
                result['queue_pos'] = pos
        else:
            result['ttl'] = rc.ttl(response_key)
    else:
        result = ERROR_RESP

    return result