Ejemplo n.º 1
0
def start(cdxjFilePath=INDEX_FILE, proxy=None):
    hostPort = ipwbConfig.getIPWBReplayConfig()
    app.proxy = proxy

    if not hostPort:
        ipwbConfig.setIPWBReplayConfig(IPWBREPLAY_IP, IPWBREPLAY_PORT)
        hostPort = ipwbConfig.getIPWBReplayConfig()

    if ipwbConfig.isDaemonAlive():
        if cdxjFilePath == INDEX_FILE:
            ipwbConfig.firstRun()
        ipwbConfig.setIPWBReplayIndexPath(cdxjFilePath)
        app.cdxjFilePath = cdxjFilePath
    else:
        print('Sample data not pulled from IPFS.')
        print('Check that the IPFS daemon is running.')

    try:
        print('IPWB replay started on http://{0}:{1}'.format(
            IPWBREPLAY_IP, IPWBREPLAY_PORT))
        app.run(host='0.0.0.0', port=IPWBREPLAY_PORT)
    except gaierror:
        print('Detected no active Internet connection.')
        print('Overriding to use default IP and port configuration.')
        app.run()
    except socketerror:
        print('Address {0}:{1} already in use!'.format(IPWBREPLAY_IP,
                                                       IPWBREPLAY_PORT))
        sys.exit()
Ejemplo n.º 2
0
def showAdmin():
    ipfsEndpoint = '{0}:{1}'.format(IPFSAPI_HOST, IPFSAPI_PORT)
    status = {
        'ipwbVersion': ipwbVersion,
        'ipfsEndpoint': ipfsEndpoint,
        'ipfsAlive': ipwbUtils.isDaemonAlive(ipfsEndpoint)
    }
    # TODO: Calculate actual URI-R/M counts
    indexes = [{
        'path': ipwbUtils.getIPWBReplayIndexPath(),
        'enabled': True,
        'urimCount': '#URI-M',
        'urirCount': '#URI-R'
    }]
    # TODO: Calculate actual values
    summary = {
        'urimCount': '#URI-M',
        'urirCount': '#URI-R',
        'htmlCount': '#HTML',
        'earliest': 'Date1',
        'latest': 'Date2'
    }
    return render_template('admin.html',
                           status=status,
                           indexes=indexes,
                           summary=summary)
Ejemplo n.º 3
0
def start(cdxjFilePath, proxy=None):
    hostPort = ipwbUtils.getIPWBReplayConfig()
    app.proxy = proxy

    if not hostPort:
        ipwbUtils.setIPWBReplayConfig(IPWBREPLAY_HOST, IPWBREPLAY_PORT)
        hostPort = ipwbUtils.getIPWBReplayConfig()

    if ipwbUtils.isDaemonAlive():
        ipwbUtils.setIPWBReplayIndexPath(cdxjFilePath)
        app.cdxjFilePath = cdxjFilePath
    else:
        print('Sample data not pulled from IPFS.')
        print('Check that the IPFS daemon is running.')

    # Perform checks for CDXJ file existence, TODO: reuse cached contents
    app.cdxjFileContents = getIndexFileContents(cdxjFilePath)

    try:
        print('IPWB replay started on http://{0}:{1}'.format(
            IPWBREPLAY_HOST, IPWBREPLAY_PORT))
        app.run(host='0.0.0.0', port=IPWBREPLAY_PORT)
    except gaierror:
        print('Detected no active Internet connection.')
        print('Overriding to use default IP and port configuration.')
        app.run()
    except socketerror:
        print('Address {0}:{1} already in use!'.format(IPWBREPLAY_HOST,
                                                       IPWBREPLAY_PORT))
        sys.exit()
Ejemplo n.º 4
0
def checkArgs_index(args):
    if not util.isDaemonAlive():
        sys.exit()
    encKey = None
    compressionLevel = None
    if args.e:
        encKey = ''
    if args.c:
        compressionLevel = 6  # Magic 6, TA-DA!

    indexer.indexFileAt(args.warcPath,
                        encKey,
                        compressionLevel,
                        args.compressFirst,
                        debug=args.debug)
Ejemplo n.º 5
0
def generateDaemonStatusButton():
    text = 'Not Running'
    buttonText = 'Start'
    if ipwbConfig.isDaemonAlive():
        text = 'Running'
        buttonText = 'Stop'

    statusPageHTML = '<html id="status{0}" class="status">'.format(buttonText)
    statusPageHTML += ('<head><base href="/webui/" /><link rel="stylesheet" '
                       'type="text/css" href="webui.css" />'
                       '<script src="webui.js"></script>'
                       '</head><body>')
    buttonHTML = '{0}<button>{1}</button>'.format(text, buttonText)
    footer = '<script>assignStatusButtonHandlers()</script></body></html>'
    return Response('{0}{1}{2}'.format(statusPageHTML, buttonHTML, footer))
Ejemplo n.º 6
0
def show_uri(path, datetime=None):
    global IPFS_API

    if len(path) == 0:
        return showWebUI('index.html')

    # TODO: Use a better approach to serve static contents
    # instead of using the same logic for every JS file as the SW script
    localScripts = [
        'serviceWorker.js', 'reconstructive.js', 'reconstructive-banner.js'
    ]
    if path in localScripts:
        return getServiceWorker(path)

    daemonAddress = '{0}:{1}'.format(IPFSAPI_IP, IPFSAPI_PORT)
    if not ipwbConfig.isDaemonAlive(daemonAddress):
        errStr = ('IPFS daemon not running. '
                  'Start it using $ ipfs daemon on the command-line '
                  ' or from the <a href="/">'
                  'IPWB replay homepage</a>.')
        return Response(errStr, status=503)

    path = getCompleteURI(path)
    cdxjLine = ''
    try:
        surtedURI = surt.surt(path,
                              path_strip_trailing_slash_unless_empty=False)
        indexPath = ipwbConfig.getIPWBReplayIndexPath()

        searchString = surtedURI
        if datetime is not None:
            searchString = surtedURI + ' ' + datetime

        cdxjLine = getCDXJLine_binarySearch(searchString, indexPath)
        print('CDXJ Line: {0}'.format(cdxjLine))

    except Exception as e:
        print(sys.exc_info()[0])
        respString = ('{0} not found :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_IP, IPWBREPLAY_PORT)
        return Response(respString)
    if cdxjLine is None:  # Resource not found in archives
        return generateNoMementosInterface(path, datetime)

    cdxjParts = cdxjLine.split(" ", 2)
    jObj = json.loads(cdxjParts[2])
    datetime = cdxjParts[1]

    digests = jObj['locator'].split('/')

    class HashNotFoundError(Exception):
        pass

    try:

        def handler(signum, frame):
            raise HashNotFoundError()

        signal.signal(signal.SIGALRM, handler)
        signal.alarm(10)

        payload = IPFS_API.cat(digests[-1])
        header = IPFS_API.cat(digests[-2])

        signal.alarm(0)

    except ipfsapi.exceptions.TimeoutError:
        print("{0} not found at {1}".format(cdxjParts[0], digests[-1]))
        respString = ('{0} not found in IPFS :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_IP, IPWBREPLAY_PORT)
        return Response(respString)
    except TypeError:
        print('A type error occurred')
        print(traceback.format_exc())
        print(sys.exc_info()[0])
    except HashNotFoundError:
        print("Hashes not found")
        return '', 404
    except Exception as e:
        print('Unknown exception occurred while fetching from ipfs.')
        print(sys.exc_info()[0])
        sys.exit()

    if 'encryption_method' in jObj:
        keyString = None
        while keyString is None:
            if 'encryption_key' in jObj:
                keyString = jObj['encryption_key']
            else:
                askForKey = ('Enter a path for file',
                             ' containing decryption key: \n> ')
                keyString = raw_input(askForKey)

        encryptionMethod = None
        if jObj['encryption_method'] == 'xor':
            encryptionMethod = XOR

        pKey = encryptionMethod.new(keyString)
        payload = pKey.decrypt(base64.b64decode(payload))
        hKey = encryptionMethod.new(keyString)
        header = hKey.decrypt(base64.b64decode(header))

    hLines = header.split('\n')
    hLines.pop(0)

    resp = Response(payload)

    for idx, hLine in enumerate(hLines):
        k, v = hLine.split(': ', 1)

        if k.lower() == 'transfer-encoding' and v.lower() == 'chunked':
            try:
                unchunkedPayload = extractResponseFromChunkedData(payload)
            except Exception as e:
                continue  # Data may have no actually been chunked
            resp.set_data(unchunkedPayload)

        if k.lower() not in ["content-type", "content-encoding"]:
            k = "X-Archive-Orig-" + k

        resp.headers[k] = v

    # Add ipwb header for additional SW logic
    newPayload = resp.get_data()
    ipwbjsinject = """<script src="/webui/webui.js"></script>
                      <script>injectIPWBJS()</script>"""
    newPayload = newPayload.replace('</html>', ipwbjsinject + '</html>')
    resp.set_data(newPayload)

    resp.headers['Memento-Datetime'] = ipwbConfig.datetimeToRFC1123(datetime)

    # Get TimeMap for Link response header
    respWithLinkHeader = getLinkHeaderAbbreviatedTimeMap(path, datetime)
    resp.headers['Link'] = respWithLinkHeader.replace('\n', ' ')

    return resp
Ejemplo n.º 7
0
def show_uri(path):
    global IPFS_API

    if len(path) == 0:
        return showWebUI('index.html')
        sys.exit()

    daemonAddress = '{0}:{1}'.format(IPFSAPI_IP, IPFSAPI_PORT)
    if not ipwbConfig.isDaemonAlive(daemonAddress):
        errStr = ('IPFS daemon not running. '
                  'Start it using $ ipfs daemon on the command-line '
                  ' or from the <a href="/">'
                  'IPWB replay homepage</a>.')
        return Response(errStr)

    # show the user profile for that user
    cdxjLine = ''

    try:
        s = surt.surt(path, path_strip_trailing_slash_unless_empty=False)
        indexPath = ipwbConfig.getIPWBReplayIndexPath()
        cdxjLine = getCDXJLine_binarySearch(s, indexPath)
    except:
        print sys.exc_info()[0]
        respString = ('{0} not found :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_IP, IPWBREPLAY_PORT)
        return Response(respString)
    if cdxjLine is None:  # Resource not found in archives
        return Response(status=404)

    cdxjParts = cdxjLine.split(" ", 2)

    jObj = json.loads(cdxjParts[2])

    digests = jObj['locator'].split('/')

    try:
        payload = IPFS_API.cat(digests[-1], timeout=1)
        header = IPFS_API.cat(digests[-2])
    except ipfsapi.exceptions.TimeoutError:
        print "{0} not found at {1}".format(cdxjParts[0], digests[-1])
        respString = ('{0} not found in IPFS :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_IP, IPWBREPLAY_PORT)
        return Response(respString)
    except:
        print sys.exc_info()[0]
        print "general error"
        sys.exit()

    if 'encryption_method' in jObj:
        pKey = XOR.new(jObj['encryption_key'])
        payload = pKey.decrypt(base64.b64decode(payload))
        hKey = XOR.new(jObj['encryption_key'])
        header = hKey.decrypt(base64.b64decode(header))

    hLines = header.split('\n')
    hLines.pop(0)

    resp = Response(payload)

    for idx, hLine in enumerate(hLines):
        k, v = hLine.split(': ', 1)
        if k.lower() != "content-type":
            k = "X-Archive-Orig-" + k
        resp.headers[k] = v

    return resp
Ejemplo n.º 8
0
def show_uri(path, datetime=None):
    global IPFS_API

    if len(path) == 0:
        return showWebUI('index.html')
        sys.exit()

    if path == 'serviceWorker.js':
        return getServiceWorker(path)
        sys.exit()

    daemonAddress = '{0}:{1}'.format(IPFSAPI_IP, IPFSAPI_PORT)
    if not ipwbConfig.isDaemonAlive(daemonAddress):
        errStr = ('IPFS daemon not running. '
                  'Start it using $ ipfs daemon on the command-line '
                  ' or from the <a href="/">'
                  'IPWB replay homepage</a>.')
        return Response(errStr)
    cdxjLine = ''
    try:
        surtedURI = surt.surt(  # Good ol' pep8 line length
            path,
            path_strip_trailing_slash_unless_empty=False)
        indexPath = ipwbConfig.getIPWBReplayIndexPath()

        searchString = surtedURI
        if datetime is not None:
            searchString = surtedURI + ' ' + datetime

        cdxjLine = getCDXJLine_binarySearch(searchString, indexPath)
    except:
        print sys.exc_info()[0]
        respString = ('{0} not found :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_IP, IPWBREPLAY_PORT)
        return Response(respString)
    if cdxjLine is None:  # Resource not found in archives
        msg = '<h1>ERROR 404</h1>'
        msg += 'No capture found for {0} at {1}.'.format(path, datetime)
        linesWithSameURIR = getCDXJLinesWithURIR(path)

        if linesWithSameURIR:
            msg += '<p>{0} capture(s) available:</p><ul>'.format(
                len(linesWithSameURIR))
            for line in linesWithSameURIR:
                fields = line.split(' ', 2)
                msg += ('<li><a href="/{1}/{0}">{0} at {1}</a></li>'.format(
                    unsurt(fields[0]), fields[1]))
            msg += '</ul>'
        return Response(msg, status=404)

    cdxjParts = cdxjLine.split(" ", 2)

    jObj = json.loads(cdxjParts[2])
    datetime = cdxjParts[1]

    digests = jObj['locator'].split('/')

    try:
        payload = IPFS_API.cat(digests[-1], timeout=1)
        header = IPFS_API.cat(digests[-2])
    except ipfsapi.exceptions.TimeoutError:
        print "{0} not found at {1}".format(cdxjParts[0], digests[-1])
        respString = ('{0} not found in IPFS :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_IP, IPWBREPLAY_PORT)
        return Response(respString)
    except:
        print sys.exc_info()[0]
        print "general error"
        sys.exit()

    if 'encryption_method' in jObj:
        pKey = XOR.new(jObj['encryption_key'])
        payload = pKey.decrypt(base64.b64decode(payload))
        hKey = XOR.new(jObj['encryption_key'])
        header = hKey.decrypt(base64.b64decode(header))

    hLines = header.split('\n')
    hLines.pop(0)

    resp = Response(payload)

    for idx, hLine in enumerate(hLines):
        k, v = hLine.split(': ', 1)

        if k.lower() == 'transfer-encoding' and v.lower() == 'chunked':
            resp.set_data(extractResponseFromChunkedData(payload))
        if k.lower() != "content-type":
            k = "X-Archive-Orig-" + k

        resp.headers[k] = v

    resp.headers['Memento-Datetime'] = ipwbConfig.datetimeToRFC1123(datetime)

    return resp
Ejemplo n.º 9
0
def show_uri(path, datetime=None):
    global IPFS_API

    if len(path) == 0:
        return showWebUI('index.html')

    # TODO: Use a better approach to serve static contents
    # instead of using the same logic for every JS file as the SW script
    localScripts = [
        'serviceWorker.js', 'reconstructive.js', 'reconstructive-banner.js'
    ]
    if path in localScripts:
        return getServiceWorker(path)

    daemonAddress = '{0}:{1}'.format(IPFSAPI_HOST, IPFSAPI_PORT)
    if not ipwbUtils.isDaemonAlive(daemonAddress):
        errStr = ('IPFS daemon not running. '
                  'Start it using $ ipfs daemon on the command-line '
                  ' or from the <a href="/">'
                  'IPWB replay homepage</a>.')
        return Response(errStr, status=503)

    path = getCompleteURI(path)
    cdxjLine = ''
    try:
        surtedURI = surt.surt(path,
                              path_strip_trailing_slash_unless_empty=False)
        indexPath = ipwbUtils.getIPWBReplayIndexPath()

        searchString = surtedURI
        if datetime is not None:
            searchString = surtedURI + ' ' + datetime

        cdxjLine = getCDXJLine_binarySearch(searchString, indexPath)
        print('CDXJ Line: {0}'.format(cdxjLine))

    except Exception as e:
        print(sys.exc_info()[0])
        respString = ('{0} not found :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_HOST, IPWBREPLAY_PORT)
        return Response(respString)
    if cdxjLine is None:  # Resource not found in archives
        return generateNoMementosInterface(path, datetime)

    cdxjParts = cdxjLine.split(" ", 2)
    jObj = json.loads(cdxjParts[2])
    datetime = cdxjParts[1]

    digests = jObj['locator'].split('/')

    class HashNotFoundError(Exception):
        pass

    payload = None
    header = None
    try:

        def handler(signum, frame):
            raise HashNotFoundError()

        if os.name != 'nt':  # Bug #310
            signal.signal(signal.SIGALRM, handler)
            signal.alarm(10)

        payload = IPFS_API.cat(digests[-1])
        header = IPFS_API.cat(digests[-2])

        if os.name != 'nt':  # Bug #310
            signal.alarm(0)

    except ipfsapi.exceptions.TimeoutError:
        print("{0} not found at {1}".format(cdxjParts[0], digests[-1]))
        respString = ('{0} not found in IPFS :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_HOST, IPWBREPLAY_PORT)
        return Response(respString)
    except TypeError as e:
        print('A type error occurred')
        print(e)
        abort(500)
    except HTTPError as e:
        print("Fetching from the IPFS failed")
        print(e)
        abort(503)
    except HashNotFoundError:
        if payload is None:
            print("Hashes not found:\n\t{0}\n\t{1}".format(
                digests[-1], digests[-2]))
            abort(404)
        else:  # payload found but not header, fabricate header
            print("HTTP header not found, fabricating for resp replay")
            header = ''
    except Exception as e:
        print('Unknown exception occurred while fetching from ipfs.')
        print(e)
        abort(500)

    if 'encryption_method' in jObj:
        keyString = None
        while keyString is None:
            if 'encryption_key' in jObj:
                keyString = jObj['encryption_key']
            else:
                askForKey = ('Enter a path for file',
                             ' containing decryption key: \n> ')
                keyString = raw_input(askForKey)

        paddedEncryptionKey = pad(keyString, AES.block_size)
        key = base64.b64encode(paddedEncryptionKey)

        nonce = b64decode(jObj['encryption_nonce'])
        cipher = AES.new(key, AES.MODE_CTR, nonce=nonce)
        header = cipher.decrypt(base64.b64decode(header))
        payload = cipher.decrypt(base64.b64decode(payload))

    hLines = header.split('\n')
    hLines.pop(0)

    status = 200
    if 'status_code' in jObj:
        status = jObj['status_code']

    resp = Response(payload, status=status)

    for idx, hLine in enumerate(hLines):
        k, v = hLine.split(': ', 1)

        if k.lower() == 'transfer-encoding' and v.lower() == 'chunked':
            try:
                unchunkedPayload = extractResponseFromChunkedData(payload)
            except Exception as e:
                continue  # Data may have no actually been chunked
            resp.set_data(unchunkedPayload)

        if k.lower() not in ["content-type", "content-encoding", "location"]:
            k = "X-Archive-Orig-" + k

        resp.headers[k] = v

    # Add ipwb header for additional SW logic
    newPayload = resp.get_data()
    ipwbjsinject = """<script src="/webui/webui.js"></script>
                      <script>injectIPWBJS()</script>"""
    newPayload = newPayload.replace('</html>', ipwbjsinject + '</html>')
    resp.set_data(newPayload)

    resp.headers['Memento-Datetime'] = ipwbUtils.digits14ToRFC1123(datetime)

    if header is None:
        resp.headers['X-Headers-Generated-By'] = 'InterPlanetary Wayback'

    # Get TimeMap for Link response header
    # respWithLinkHeader = getLinkHeaderAbbreviatedTimeMap(path, datetime)
    # resp.headers['Link'] = respWithLinkHeader.replace('\n', ' ')

    if status[0] == '3' and isUri(resp.headers.get('Location')):
        # Bad assumption that the URI-M will contain \d14 but works for now.
        uriBeforeURIR = request.url[:re.search(r'/\d{14}/', request.url).end()]
        newURIM = uriBeforeURIR + resp.headers['Location']
        resp.headers['Location'] = newURIM

    return resp