def download(): browser_type, archive, url = get_params() response_key = get_cache_key(archive, browser_type, url) result = rc.get(response_key) if not result: raise HTTPError(status=404, body='Url Not Archived') result = json.loads(result) if not 'download_url' in result: raise HTTPError(status=404, body='Download Not Available') headers = {} session = result.get('download_session') if session: headers['Cookie'] = session r = requests.get(result['download_url'], headers=headers, stream=True) if r.status_code != 200: raise HTTPError(status=400, body='Invalid Download Result: {0} {1}'.format( r.status_code, r.reason)) pass_headers = ('Content-Disposition', 'Content-Length', 'Content-Type') for h in pass_headers: response.set_header(h, r.headers.get(h)) response.body = r.iter_content() return response
def download(): browser_type, archive, url = get_params() response_key = get_cache_key(archive, browser_type, url) result = rc.get(response_key) if not result: raise HTTPError(status=404, body='Url Not Archived') result = json.loads(result) if not 'download_url' in result: raise HTTPError(status=404, body='Download Not Available') headers = {} session = result.get('download_session') if session: headers['Cookie'] = session r = requests.get(result['download_url'], headers=headers, stream=True) if r.status_code != 200: raise HTTPError(status=400, body='Invalid Download Result: {0} {1}'.format(r.status_code, r.reason)) pass_headers = ('Content-Disposition', 'Content-Length', 'Content-Type') for h in pass_headers: response.set_header(h, r.headers.get(h)) response.body = r.iter_content() return response
def archive_page(): browser_type, archive, url = get_params() response_key = get_cache_key(archive, browser_type, url) wait_key = get_wait_key(archive, browser_type, url) queue_key = get_queue_key(browser_type) result = None if not rc.exists(response_key): cmd = dict(request.query) cmd['url'] = url num = rc.incr('total_urls:' + browser_type) cmd['num'] = num cmd = json.dumps(cmd) with pipeline(rc) as pi: waiting_str = {'archived': False, 'queued': True, 'num': num} pi.set(response_key, json.dumps(waiting_str)) pi.rpush(queue_key, cmd) rc.blpop(wait_key, theconfig['wait_timeout_secs']) result = rc.get(response_key) if result: result = json.loads(result) if 'queued' in result: result['queue_pos'] = 0 front = rc.lindex(queue_key, 0) if front: front = json.loads(front) front_num = front.get('num', 0) # pos == 1 implies this url is next up # pos <= 0 implies this url was removed from queue and is being processed pos = result['num'] - front_num + 1 result['queue_pos'] = pos else: result['ttl'] = rc.ttl(response_key) else: result = ERROR_RESP return result