def start(cdxjFilePath=INDEX_FILE, proxy=None): hostPort = ipwbConfig.getIPWBReplayConfig() app.proxy = proxy if not hostPort: ipwbConfig.setIPWBReplayConfig(IPWBREPLAY_IP, IPWBREPLAY_PORT) hostPort = ipwbConfig.getIPWBReplayConfig() if ipwbConfig.isDaemonAlive(): if cdxjFilePath == INDEX_FILE: ipwbConfig.firstRun() ipwbConfig.setIPWBReplayIndexPath(cdxjFilePath) app.cdxjFilePath = cdxjFilePath else: print('Sample data not pulled from IPFS.') print('Check that the IPFS daemon is running.') try: print('IPWB replay started on http://{0}:{1}'.format( IPWBREPLAY_IP, IPWBREPLAY_PORT)) app.run(host='0.0.0.0', port=IPWBREPLAY_PORT) except gaierror: print('Detected no active Internet connection.') print('Overriding to use default IP and port configuration.') app.run() except socketerror: print('Address {0}:{1} already in use!'.format(IPWBREPLAY_IP, IPWBREPLAY_PORT)) sys.exit()
def showAdmin(): ipfsEndpoint = '{0}:{1}'.format(IPFSAPI_HOST, IPFSAPI_PORT) status = { 'ipwbVersion': ipwbVersion, 'ipfsEndpoint': ipfsEndpoint, 'ipfsAlive': ipwbUtils.isDaemonAlive(ipfsEndpoint) } # TODO: Calculate actual URI-R/M counts indexes = [{ 'path': ipwbUtils.getIPWBReplayIndexPath(), 'enabled': True, 'urimCount': '#URI-M', 'urirCount': '#URI-R' }] # TODO: Calculate actual values summary = { 'urimCount': '#URI-M', 'urirCount': '#URI-R', 'htmlCount': '#HTML', 'earliest': 'Date1', 'latest': 'Date2' } return render_template('admin.html', status=status, indexes=indexes, summary=summary)
def start(cdxjFilePath, proxy=None): hostPort = ipwbUtils.getIPWBReplayConfig() app.proxy = proxy if not hostPort: ipwbUtils.setIPWBReplayConfig(IPWBREPLAY_HOST, IPWBREPLAY_PORT) hostPort = ipwbUtils.getIPWBReplayConfig() if ipwbUtils.isDaemonAlive(): ipwbUtils.setIPWBReplayIndexPath(cdxjFilePath) app.cdxjFilePath = cdxjFilePath else: print('Sample data not pulled from IPFS.') print('Check that the IPFS daemon is running.') # Perform checks for CDXJ file existence, TODO: reuse cached contents app.cdxjFileContents = getIndexFileContents(cdxjFilePath) try: print('IPWB replay started on http://{0}:{1}'.format( IPWBREPLAY_HOST, IPWBREPLAY_PORT)) app.run(host='0.0.0.0', port=IPWBREPLAY_PORT) except gaierror: print('Detected no active Internet connection.') print('Overriding to use default IP and port configuration.') app.run() except socketerror: print('Address {0}:{1} already in use!'.format(IPWBREPLAY_HOST, IPWBREPLAY_PORT)) sys.exit()
def checkArgs_index(args): if not util.isDaemonAlive(): sys.exit() encKey = None compressionLevel = None if args.e: encKey = '' if args.c: compressionLevel = 6 # Magic 6, TA-DA! indexer.indexFileAt(args.warcPath, encKey, compressionLevel, args.compressFirst, debug=args.debug)
def generateDaemonStatusButton(): text = 'Not Running' buttonText = 'Start' if ipwbConfig.isDaemonAlive(): text = 'Running' buttonText = 'Stop' statusPageHTML = '<html id="status{0}" class="status">'.format(buttonText) statusPageHTML += ('<head><base href="/webui/" /><link rel="stylesheet" ' 'type="text/css" href="webui.css" />' '<script src="webui.js"></script>' '</head><body>') buttonHTML = '{0}<button>{1}</button>'.format(text, buttonText) footer = '<script>assignStatusButtonHandlers()</script></body></html>' return Response('{0}{1}{2}'.format(statusPageHTML, buttonHTML, footer))
def show_uri(path, datetime=None): global IPFS_API if len(path) == 0: return showWebUI('index.html') # TODO: Use a better approach to serve static contents # instead of using the same logic for every JS file as the SW script localScripts = [ 'serviceWorker.js', 'reconstructive.js', 'reconstructive-banner.js' ] if path in localScripts: return getServiceWorker(path) daemonAddress = '{0}:{1}'.format(IPFSAPI_IP, IPFSAPI_PORT) if not ipwbConfig.isDaemonAlive(daemonAddress): errStr = ('IPFS daemon not running. ' 'Start it using $ ipfs daemon on the command-line ' ' or from the <a href="/">' 'IPWB replay homepage</a>.') return Response(errStr, status=503) path = getCompleteURI(path) cdxjLine = '' try: surtedURI = surt.surt(path, path_strip_trailing_slash_unless_empty=False) indexPath = ipwbConfig.getIPWBReplayIndexPath() searchString = surtedURI if datetime is not None: searchString = surtedURI + ' ' + datetime cdxjLine = getCDXJLine_binarySearch(searchString, indexPath) print('CDXJ Line: {0}'.format(cdxjLine)) except Exception as e: print(sys.exc_info()[0]) respString = ('{0} not found :(' + ' <a href="http://{1}:{2}">Go home</a>').format( path, IPWBREPLAY_IP, IPWBREPLAY_PORT) return Response(respString) if cdxjLine is None: # Resource not found in archives return generateNoMementosInterface(path, datetime) cdxjParts = cdxjLine.split(" ", 2) jObj = json.loads(cdxjParts[2]) datetime = cdxjParts[1] digests = jObj['locator'].split('/') class HashNotFoundError(Exception): pass try: def handler(signum, frame): raise HashNotFoundError() signal.signal(signal.SIGALRM, handler) signal.alarm(10) payload = IPFS_API.cat(digests[-1]) header = IPFS_API.cat(digests[-2]) signal.alarm(0) except ipfsapi.exceptions.TimeoutError: print("{0} not found at {1}".format(cdxjParts[0], digests[-1])) respString = ('{0} not found in IPFS :(' + ' <a href="http://{1}:{2}">Go home</a>').format( path, IPWBREPLAY_IP, IPWBREPLAY_PORT) return Response(respString) except TypeError: print('A type error occurred') print(traceback.format_exc()) print(sys.exc_info()[0]) except HashNotFoundError: print("Hashes not found") return '', 404 except Exception as e: print('Unknown exception occurred while fetching from ipfs.') print(sys.exc_info()[0]) sys.exit() if 'encryption_method' in jObj: keyString = None while keyString is None: if 'encryption_key' in jObj: keyString = jObj['encryption_key'] else: askForKey = ('Enter a path for file', ' containing decryption key: \n> ') keyString = raw_input(askForKey) encryptionMethod = None if jObj['encryption_method'] == 'xor': encryptionMethod = XOR pKey = encryptionMethod.new(keyString) payload = pKey.decrypt(base64.b64decode(payload)) hKey = encryptionMethod.new(keyString) header = hKey.decrypt(base64.b64decode(header)) hLines = header.split('\n') hLines.pop(0) resp = Response(payload) for idx, hLine in enumerate(hLines): k, v = hLine.split(': ', 1) if k.lower() == 'transfer-encoding' and v.lower() == 'chunked': try: unchunkedPayload = extractResponseFromChunkedData(payload) except Exception as e: continue # Data may have no actually been chunked resp.set_data(unchunkedPayload) if k.lower() not in ["content-type", "content-encoding"]: k = "X-Archive-Orig-" + k resp.headers[k] = v # Add ipwb header for additional SW logic newPayload = resp.get_data() ipwbjsinject = """<script src="/webui/webui.js"></script> <script>injectIPWBJS()</script>""" newPayload = newPayload.replace('</html>', ipwbjsinject + '</html>') resp.set_data(newPayload) resp.headers['Memento-Datetime'] = ipwbConfig.datetimeToRFC1123(datetime) # Get TimeMap for Link response header respWithLinkHeader = getLinkHeaderAbbreviatedTimeMap(path, datetime) resp.headers['Link'] = respWithLinkHeader.replace('\n', ' ') return resp
def show_uri(path): global IPFS_API if len(path) == 0: return showWebUI('index.html') sys.exit() daemonAddress = '{0}:{1}'.format(IPFSAPI_IP, IPFSAPI_PORT) if not ipwbConfig.isDaemonAlive(daemonAddress): errStr = ('IPFS daemon not running. ' 'Start it using $ ipfs daemon on the command-line ' ' or from the <a href="/">' 'IPWB replay homepage</a>.') return Response(errStr) # show the user profile for that user cdxjLine = '' try: s = surt.surt(path, path_strip_trailing_slash_unless_empty=False) indexPath = ipwbConfig.getIPWBReplayIndexPath() cdxjLine = getCDXJLine_binarySearch(s, indexPath) except: print sys.exc_info()[0] respString = ('{0} not found :(' + ' <a href="http://{1}:{2}">Go home</a>').format( path, IPWBREPLAY_IP, IPWBREPLAY_PORT) return Response(respString) if cdxjLine is None: # Resource not found in archives return Response(status=404) cdxjParts = cdxjLine.split(" ", 2) jObj = json.loads(cdxjParts[2]) digests = jObj['locator'].split('/') try: payload = IPFS_API.cat(digests[-1], timeout=1) header = IPFS_API.cat(digests[-2]) except ipfsapi.exceptions.TimeoutError: print "{0} not found at {1}".format(cdxjParts[0], digests[-1]) respString = ('{0} not found in IPFS :(' + ' <a href="http://{1}:{2}">Go home</a>').format( path, IPWBREPLAY_IP, IPWBREPLAY_PORT) return Response(respString) except: print sys.exc_info()[0] print "general error" sys.exit() if 'encryption_method' in jObj: pKey = XOR.new(jObj['encryption_key']) payload = pKey.decrypt(base64.b64decode(payload)) hKey = XOR.new(jObj['encryption_key']) header = hKey.decrypt(base64.b64decode(header)) hLines = header.split('\n') hLines.pop(0) resp = Response(payload) for idx, hLine in enumerate(hLines): k, v = hLine.split(': ', 1) if k.lower() != "content-type": k = "X-Archive-Orig-" + k resp.headers[k] = v return resp
def show_uri(path, datetime=None): global IPFS_API if len(path) == 0: return showWebUI('index.html') sys.exit() if path == 'serviceWorker.js': return getServiceWorker(path) sys.exit() daemonAddress = '{0}:{1}'.format(IPFSAPI_IP, IPFSAPI_PORT) if not ipwbConfig.isDaemonAlive(daemonAddress): errStr = ('IPFS daemon not running. ' 'Start it using $ ipfs daemon on the command-line ' ' or from the <a href="/">' 'IPWB replay homepage</a>.') return Response(errStr) cdxjLine = '' try: surtedURI = surt.surt( # Good ol' pep8 line length path, path_strip_trailing_slash_unless_empty=False) indexPath = ipwbConfig.getIPWBReplayIndexPath() searchString = surtedURI if datetime is not None: searchString = surtedURI + ' ' + datetime cdxjLine = getCDXJLine_binarySearch(searchString, indexPath) except: print sys.exc_info()[0] respString = ('{0} not found :(' + ' <a href="http://{1}:{2}">Go home</a>').format( path, IPWBREPLAY_IP, IPWBREPLAY_PORT) return Response(respString) if cdxjLine is None: # Resource not found in archives msg = '<h1>ERROR 404</h1>' msg += 'No capture found for {0} at {1}.'.format(path, datetime) linesWithSameURIR = getCDXJLinesWithURIR(path) if linesWithSameURIR: msg += '<p>{0} capture(s) available:</p><ul>'.format( len(linesWithSameURIR)) for line in linesWithSameURIR: fields = line.split(' ', 2) msg += ('<li><a href="/{1}/{0}">{0} at {1}</a></li>'.format( unsurt(fields[0]), fields[1])) msg += '</ul>' return Response(msg, status=404) cdxjParts = cdxjLine.split(" ", 2) jObj = json.loads(cdxjParts[2]) datetime = cdxjParts[1] digests = jObj['locator'].split('/') try: payload = IPFS_API.cat(digests[-1], timeout=1) header = IPFS_API.cat(digests[-2]) except ipfsapi.exceptions.TimeoutError: print "{0} not found at {1}".format(cdxjParts[0], digests[-1]) respString = ('{0} not found in IPFS :(' + ' <a href="http://{1}:{2}">Go home</a>').format( path, IPWBREPLAY_IP, IPWBREPLAY_PORT) return Response(respString) except: print sys.exc_info()[0] print "general error" sys.exit() if 'encryption_method' in jObj: pKey = XOR.new(jObj['encryption_key']) payload = pKey.decrypt(base64.b64decode(payload)) hKey = XOR.new(jObj['encryption_key']) header = hKey.decrypt(base64.b64decode(header)) hLines = header.split('\n') hLines.pop(0) resp = Response(payload) for idx, hLine in enumerate(hLines): k, v = hLine.split(': ', 1) if k.lower() == 'transfer-encoding' and v.lower() == 'chunked': resp.set_data(extractResponseFromChunkedData(payload)) if k.lower() != "content-type": k = "X-Archive-Orig-" + k resp.headers[k] = v resp.headers['Memento-Datetime'] = ipwbConfig.datetimeToRFC1123(datetime) return resp
def show_uri(path, datetime=None): global IPFS_API if len(path) == 0: return showWebUI('index.html') # TODO: Use a better approach to serve static contents # instead of using the same logic for every JS file as the SW script localScripts = [ 'serviceWorker.js', 'reconstructive.js', 'reconstructive-banner.js' ] if path in localScripts: return getServiceWorker(path) daemonAddress = '{0}:{1}'.format(IPFSAPI_HOST, IPFSAPI_PORT) if not ipwbUtils.isDaemonAlive(daemonAddress): errStr = ('IPFS daemon not running. ' 'Start it using $ ipfs daemon on the command-line ' ' or from the <a href="/">' 'IPWB replay homepage</a>.') return Response(errStr, status=503) path = getCompleteURI(path) cdxjLine = '' try: surtedURI = surt.surt(path, path_strip_trailing_slash_unless_empty=False) indexPath = ipwbUtils.getIPWBReplayIndexPath() searchString = surtedURI if datetime is not None: searchString = surtedURI + ' ' + datetime cdxjLine = getCDXJLine_binarySearch(searchString, indexPath) print('CDXJ Line: {0}'.format(cdxjLine)) except Exception as e: print(sys.exc_info()[0]) respString = ('{0} not found :(' + ' <a href="http://{1}:{2}">Go home</a>').format( path, IPWBREPLAY_HOST, IPWBREPLAY_PORT) return Response(respString) if cdxjLine is None: # Resource not found in archives return generateNoMementosInterface(path, datetime) cdxjParts = cdxjLine.split(" ", 2) jObj = json.loads(cdxjParts[2]) datetime = cdxjParts[1] digests = jObj['locator'].split('/') class HashNotFoundError(Exception): pass payload = None header = None try: def handler(signum, frame): raise HashNotFoundError() if os.name != 'nt': # Bug #310 signal.signal(signal.SIGALRM, handler) signal.alarm(10) payload = IPFS_API.cat(digests[-1]) header = IPFS_API.cat(digests[-2]) if os.name != 'nt': # Bug #310 signal.alarm(0) except ipfsapi.exceptions.TimeoutError: print("{0} not found at {1}".format(cdxjParts[0], digests[-1])) respString = ('{0} not found in IPFS :(' + ' <a href="http://{1}:{2}">Go home</a>').format( path, IPWBREPLAY_HOST, IPWBREPLAY_PORT) return Response(respString) except TypeError as e: print('A type error occurred') print(e) abort(500) except HTTPError as e: print("Fetching from the IPFS failed") print(e) abort(503) except HashNotFoundError: if payload is None: print("Hashes not found:\n\t{0}\n\t{1}".format( digests[-1], digests[-2])) abort(404) else: # payload found but not header, fabricate header print("HTTP header not found, fabricating for resp replay") header = '' except Exception as e: print('Unknown exception occurred while fetching from ipfs.') print(e) abort(500) if 'encryption_method' in jObj: keyString = None while keyString is None: if 'encryption_key' in jObj: keyString = jObj['encryption_key'] else: askForKey = ('Enter a path for file', ' containing decryption key: \n> ') keyString = raw_input(askForKey) paddedEncryptionKey = pad(keyString, AES.block_size) key = base64.b64encode(paddedEncryptionKey) nonce = b64decode(jObj['encryption_nonce']) cipher = AES.new(key, AES.MODE_CTR, nonce=nonce) header = cipher.decrypt(base64.b64decode(header)) payload = cipher.decrypt(base64.b64decode(payload)) hLines = header.split('\n') hLines.pop(0) status = 200 if 'status_code' in jObj: status = jObj['status_code'] resp = Response(payload, status=status) for idx, hLine in enumerate(hLines): k, v = hLine.split(': ', 1) if k.lower() == 'transfer-encoding' and v.lower() == 'chunked': try: unchunkedPayload = extractResponseFromChunkedData(payload) except Exception as e: continue # Data may have no actually been chunked resp.set_data(unchunkedPayload) if k.lower() not in ["content-type", "content-encoding", "location"]: k = "X-Archive-Orig-" + k resp.headers[k] = v # Add ipwb header for additional SW logic newPayload = resp.get_data() ipwbjsinject = """<script src="/webui/webui.js"></script> <script>injectIPWBJS()</script>""" newPayload = newPayload.replace('</html>', ipwbjsinject + '</html>') resp.set_data(newPayload) resp.headers['Memento-Datetime'] = ipwbUtils.digits14ToRFC1123(datetime) if header is None: resp.headers['X-Headers-Generated-By'] = 'InterPlanetary Wayback' # Get TimeMap for Link response header # respWithLinkHeader = getLinkHeaderAbbreviatedTimeMap(path, datetime) # resp.headers['Link'] = respWithLinkHeader.replace('\n', ' ') if status[0] == '3' and isUri(resp.headers.get('Location')): # Bad assumption that the URI-M will contain \d14 but works for now. uriBeforeURIR = request.url[:re.search(r'/\d{14}/', request.url).end()] newURIM = uriBeforeURIR + resp.headers['Location'] resp.headers['Location'] = newURIM return resp