Example #1
0
def showAdmin():
    ipfsEndpoint = '{0}:{1}'.format(IPFSAPI_HOST, IPFSAPI_PORT)
    status = {
        'ipwbVersion': ipwbVersion,
        'ipfsEndpoint': ipfsEndpoint,
        'ipfsAlive': ipwbUtils.isDaemonAlive(ipfsEndpoint)
    }
    # TODO: Calculate actual URI-R/M counts
    indexes = [{
        'path': ipwbUtils.getIPWBReplayIndexPath(),
        'enabled': True,
        'urimCount': '#URI-M',
        'urirCount': '#URI-R'
    }]
    # TODO: Calculate actual values
    summary = {
        'urimCount': '#URI-M',
        'urirCount': '#URI-R',
        'htmlCount': '#HTML',
        'earliest': 'Date1',
        'latest': 'Date2'
    }
    return render_template('admin.html',
                           status=status,
                           indexes=indexes,
                           summary=summary)
Example #2
0
def showWebUI(path):
    webuiPath = '/'.join(('webui', path)).replace('ipwb.replay', 'ipwb')
    content = pkg_resources.resource_string(__name__, webuiPath)

    if 'index.html' in path:
        iFile = ipwbConfig.getIPWBReplayIndexPath()

        if iFile is None or iFile == '':
            iFile = pkg_resources.resource_filename(__name__, INDEX_FILE)

        if not os.path.isabs(iFile):  # Convert rel to abs path
            iFileAbs = pkg_resources.resource_filename(__name__, iFile)
            if os.path.exists(iFileAbs):
                iFile = iFileAbs  # Local file

        content = content.replace('MEMCOUNT', str(retrieveMemCount(iFile)))

        content = content.replace(
            'var uris = []',
            'var uris = {0}'.format(getURIsAndDatetimesInCDXJ(iFile)))
        content = content.replace('INDEXSRC', iFile)

    fileExtension = os.path.splitext(path)[1]

    mimeType = 'text/html'

    if fileExtension == '.js':
        mimeType = 'application/javascript'
    elif fileExtension == '.css':
        mimeType = 'text/css'

    resp = Response(content, mimetype=mimeType)
    resp.headers['Service-Worker-Allowed'] = '/'

    return resp
Example #3
0
def resolveMemento(urir, datetime):
    """ Request a URI-R at a supplied datetime from the CDXJ """
    urir = getCompleteURI(urir)

    if ipwbUtils.isLocalHosty(urir):
        urir = urir.split('/', 4)[4]
    s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False)
    indexPath = ipwbUtils.getIPWBReplayIndexPath()

    print('Getting CDXJ Lines with the URI-R {0} from {1}'.format(
        urir, indexPath))
    cdxjLinesWithURIR = getCDXJLinesWithURIR(urir, indexPath)

    closestLine = getCDXJLineClosestTo(datetime, cdxjLinesWithURIR)

    if closestLine is None:
        msg = '<h1>ERROR 404</h1>'
        msg += 'No capture found for {0} at {1}.'.format(urir, datetime)

        return Response(msg, status=404)

    uri = unsurt(closestLine.split(' ')[0])
    newDatetime = closestLine.split(' ')[1]

    linkHeader = getLinkHeaderAbbreviatedTimeMap(urir, newDatetime)

    return (newDatetime, linkHeader, uri)
Example #4
0
def showTimeMap(urir, format):
    urir = getCompleteURI(urir)
    s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False)
    indexPath = ipwbUtils.getIPWBReplayIndexPath()

    cdxjLinesWithURIR = getCDXJLinesWithURIR(urir, indexPath)
    tmContentType = ''

    hostAndPort = ipwbUtils.getIPWBReplayConfig()

    tgURI = 'http://{0}:{1}/timegate/{2}'.format(hostAndPort[0],
                                                 hostAndPort[1], urir)

    tm = ''  # Initialize for usage beyond below conditionals
    if format == 'link':
        tm = generateLinkTimeMapFromCDXJLines(cdxjLinesWithURIR, s,
                                              request.url, tgURI)
        tmContentType = 'application/link-format'
    elif format == 'cdxj':
        tm = generateCDXJTimeMapFromCDXJLines(cdxjLinesWithURIR, s,
                                              request.url, tgURI)
        tmContentType = 'application/cdxj+ors'

    resp = Response(tm)
    resp.headers['Content-Type'] = tmContentType

    return resp
Example #5
0
def getCDXJLinesWithURIR(urir, indexPath):
    """ Get all CDXJ records corresponding to a URI-R """
    if not indexPath:
        indexPath = ipwbConfig.getIPWBReplayIndexPath()
    indexPath = getIndexFileFullPath(indexPath)

    print('Getting CDXJ Lines with {0} in {1}'.format(urir, indexPath))
    s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False)
    cdxjLinesWithURIR = []

    cdxjLineIndex = getCDXJLine_binarySearch(s, indexPath, True, True)  # get i

    if cdxjLineIndex is None:
        return []

    cdxjLines = []
    with open(indexPath, 'r') as f:
        cdxjLines = f.read().split('\n')
        baseCDXJLine = cdxjLines[cdxjLineIndex]  # via binsearch

        cdxjLinesWithURIR.append(baseCDXJLine)

    # Get lines before pivot that match surt
    sI = cdxjLineIndex - 1
    while sI >= 0:
        if cdxjLines[sI].split(' ')[0] == s:
            cdxjLinesWithURIR.append(cdxjLines[sI])
        sI -= 1
    # Get lines after pivot that match surt
    sI = cdxjLineIndex + 1
    while sI < len(cdxjLines):
        if cdxjLines[sI].split(' ')[0] == s:
            cdxjLinesWithURIR.append(cdxjLines[sI])
        sI += 1
    return cdxjLinesWithURIR
Example #6
0
def showMementosForURIRs(urir):
    urir = getCompleteURI(urir)

    if ipwbConfig.isLocalHosty(urir):
        urir = urir.split('/', 4)[4]
    s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False)
    indexPath = ipwbConfig.getIPWBReplayIndexPath()

    print('Getting CDXJ Lines with the URI-R {0} from {1}'.format(
        urir, indexPath))
    cdxjLinesWithURIR = getCDXJLinesWithURIR(urir, indexPath)

    if len(cdxjLinesWithURIR) == 1:
        fields = cdxjLinesWithURIR[0].split(' ', 2)
        redirectURI = '/{1}/{0}'.format(unsurt(fields[0]), fields[1])
        return redirect(redirectURI, code=302)

    msg = ''
    if cdxjLinesWithURIR:
        msg += '<p>{0} capture(s) available:</p><ul>'.format(
            len(cdxjLinesWithURIR))
        for line in cdxjLinesWithURIR:
            fields = line.split(' ', 2)
            dt14 = fields[1]
            dtrfc1123 = ipwbConfig.datetimeToRFC1123(fields[1])
            msg += ('<li><a href="/{1}/{0}">{0} at {2}</a></li>'.format(
                unsurt(fields[0]), dt14, dtrfc1123))
        msg += '</ul>'
    return Response(msg)
Example #7
0
def getCDXJLinesWithURIR(urir, indexPath=ipwbConfig.getIPWBReplayIndexPath()):
    s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False)
    cdxjLinesWithURIR = []

    cdxjLineIndex = getCDXJLine_binarySearch(s, indexPath, True, True)  # get i

    if cdxjLineIndex is None:
        return []

    cdxjLines = []
    with open(indexPath, 'r') as f:
        cdxjLines = f.read().split('\n')
        baseCDXJLine = cdxjLines[cdxjLineIndex]  # via binsearch

        cdxjLinesWithURIR.append(baseCDXJLine)

    # Get lines before pivot that match surt
    sI = cdxjLineIndex - 1
    while sI >= 0:
        if cdxjLines[sI].split(' ')[0] == s:
            cdxjLinesWithURIR.append(cdxjLines[sI])
        sI -= 1
    # Get lines after pivot that match surt
    sI = cdxjLineIndex + 1
    while sI < len(cdxjLines):
        if cdxjLines[sI].split(' ')[0] == s:
            cdxjLinesWithURIR.append(cdxjLines[sI])
        sI += 1
    return cdxjLinesWithURIR
Example #8
0
def showTimeMap(urir, format):
    s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False)
    indexPath = ipwbConfig.getIPWBReplayIndexPath()

    cdxjLinesWithURIR = getCDXJLinesWithURIR(urir, indexPath)

    tm = generateTimeMapFromCDXJLines(cdxjLinesWithURIR, s, request.url)

    return Response(tm)
Example #9
0
def getLinkHeaderAbbreviatedTimeMap(urir, pivotDatetime):
    s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False)
    indexPath = ipwbConfig.getIPWBReplayIndexPath()
    cdxjLinesWithURIR = getCDXJLinesWithURIR(urir, indexPath)
    hostAndPort = ipwbConfig.getIPWBReplayConfig()

    tmURI = 'http://{0}:{1}/timemap/link/{2}'.format(
        'localhost',  # hostAndPort[0],
        hostAndPort[1],
        urir)
    tm = generateLinkTimeMapFromCDXJLines(cdxjLinesWithURIR, s, tmURI)

    # Fix base TM relation when viewing abbrev version in Link resp
    tm = tm.replace('rel="self"', 'rel="timemap"')

    # Only one memento in TimeMap
    if 'rel="first last memento"' in tm:
        return tm

    tmLines = tm.split('\n')
    for idx, line in enumerate(tmLines):
        if len(re.findall('rel=.*memento"', line)) == 0:
            continue  # Not a memento

        if pivotDatetime in line:
            addBothNextAndPrev = False
            if idx > 0 and idx < len(tmLines) - 1:
                addBothNextAndPrev = True

            if addBothNextAndPrev or idx == 0:
                tmLines[idx + 1] = \
                    tmLines[idx + 1].replace('memento"', 'next memento"')
            if addBothNextAndPrev or idx == len(tmLines) - 1:
                tmLines[idx - 1] = \
                    tmLines[idx - 1].replace('memento"', 'prev memento"')
            break

    # Remove all mementos in abbrev TM that are not:
    #   first, last, prev, next, or pivot
    for idx, line in enumerate(tmLines):
        if len(re.findall('rel=.*memento"', line)) == 0:
            continue  # Not a memento
        if pivotDatetime in line:
            continue

        if len(re.findall('rel=.*(next|prev|first|last)', line)) == 0:
            tmLines[idx] = ''

    tm = '\n'.join(tmLines)

    return tm
Example #10
0
def showTimeMap(urir, format):
    urir = getCompleteURI(urir)
    s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False)
    indexPath = ipwbConfig.getIPWBReplayIndexPath()

    cdxjLinesWithURIR = getCDXJLinesWithURIR(urir, indexPath)
    tmContentType = ''
    if format == 'link':
        tm = generateLinkTimeMapFromCDXJLines(cdxjLinesWithURIR, s,
                                              request.url)
        tmContentType = 'application/link-format'
    elif format == 'cdxj':
        tm = generateCDXJTimeMapFromCDXJLines(cdxjLinesWithURIR, s,
                                              request.url)
        tmContentType = 'application/cdxj+ors'

    resp = Response(tm)
    resp.headers['Content-Type'] = tmContentType

    return resp
Example #11
0
def showMemento(urir, datetime):
    s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False)
    indexPath = ipwbConfig.getIPWBReplayIndexPath()

    cdxjLinesWithURIR = getCDXJLinesWithURIR(urir, indexPath)

    print('Resolving request for {0} at {1}'.format(urir, datetime))
    print('Found {0} cdxj entrie(s) for '.format(len(cdxjLinesWithURIR)))
    print('MEMENTOS:')
    print(cdxjLinesWithURIR)

    closestLine = getCDXJLineClosestTo(datetime, cdxjLinesWithURIR)
    print "best line: " + closestLine
    if closestLine is None:
        msg = '<h1>ERROR 404</h1>'
        msg += 'No capture found for {0} at {1}.'.format(path, datetime)
        return Response(msg, status=404)

    uri = unsurt(closestLine.split(' ')[0])

    return show_uri(uri, datetime)
Example #12
0
def showWebUI(path):
    webuiPath = '/'.join(('webui', path)).replace('ipwb.replay', 'ipwb')
    content = pkg_resources.resource_string(__name__, webuiPath)

    if 'index.html' in path:
        iFile = ipwbConfig.getIPWBReplayIndexPath()

        if iFile is None or iFile == '':
            iFile = pkg_resources.resource_filename(__name__, INDEX_FILE)

        if not os.path.isabs(iFile):  # Convert rel to abs path
            iFileAbs = pkg_resources.resource_filename(__name__, iFile)
            if os.path.exists(iFileAbs):
                iFile = iFileAbs  # Local file

        content = content.replace('MEMCOUNT', str(retrieveMemCount(iFile)))

        content = content.replace(
            'var uris = []',
            'var uris = {0}'.format(getURIsAndDatetimesInCDXJ(iFile)))
        content = content.replace('INDEXSRC', iFile)

    return Response(content)
Example #13
0
def showMemento(urir, datetime):
    """ Request a URI-R at a supplied datetime from the CDXJ """
    urir = getCompleteURI(urir)

    if ipwbConfig.isLocalHosty(urir):
        urir = urir.split('/', 4)[4]
    s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False)
    indexPath = ipwbConfig.getIPWBReplayIndexPath()

    print('Getting CDXJ Lines with the URI-R {0} from {1}'.format(
        urir, indexPath))
    cdxjLinesWithURIR = getCDXJLinesWithURIR(urir, indexPath)

    closestLine = getCDXJLineClosestTo(datetime, cdxjLinesWithURIR)
    if closestLine is None:
        msg = '<h1>ERROR 404</h1>'
        msg += 'No capture found for {0} at {1}.'.format(urir, datetime)
        return Response(msg, status=404)

    uri = unsurt(closestLine.split(' ')[0])
    newDatetime = closestLine.split(' ')[1]
    if newDatetime != datetime:
        return redirect('/memento/{0}/{1}'.format(newDatetime, urir), code=302)
    return show_uri(uri, newDatetime)
Example #14
0
def show_uri(path, datetime=None):
    global IPFS_API

    if len(path) == 0:
        return showWebUI('index.html')

    # TODO: Use a better approach to serve static contents
    # instead of using the same logic for every JS file as the SW script
    localScripts = [
        'serviceWorker.js', 'reconstructive.js', 'reconstructive-banner.js'
    ]
    if path in localScripts:
        return getServiceWorker(path)

    daemonAddress = '{0}:{1}'.format(IPFSAPI_IP, IPFSAPI_PORT)
    if not ipwbConfig.isDaemonAlive(daemonAddress):
        errStr = ('IPFS daemon not running. '
                  'Start it using $ ipfs daemon on the command-line '
                  ' or from the <a href="/">'
                  'IPWB replay homepage</a>.')
        return Response(errStr, status=503)

    path = getCompleteURI(path)
    cdxjLine = ''
    try:
        surtedURI = surt.surt(path,
                              path_strip_trailing_slash_unless_empty=False)
        indexPath = ipwbConfig.getIPWBReplayIndexPath()

        searchString = surtedURI
        if datetime is not None:
            searchString = surtedURI + ' ' + datetime

        cdxjLine = getCDXJLine_binarySearch(searchString, indexPath)
        print('CDXJ Line: {0}'.format(cdxjLine))

    except Exception as e:
        print(sys.exc_info()[0])
        respString = ('{0} not found :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_IP, IPWBREPLAY_PORT)
        return Response(respString)
    if cdxjLine is None:  # Resource not found in archives
        return generateNoMementosInterface(path, datetime)

    cdxjParts = cdxjLine.split(" ", 2)
    jObj = json.loads(cdxjParts[2])
    datetime = cdxjParts[1]

    digests = jObj['locator'].split('/')

    class HashNotFoundError(Exception):
        pass

    try:

        def handler(signum, frame):
            raise HashNotFoundError()

        signal.signal(signal.SIGALRM, handler)
        signal.alarm(10)

        payload = IPFS_API.cat(digests[-1])
        header = IPFS_API.cat(digests[-2])

        signal.alarm(0)

    except ipfsapi.exceptions.TimeoutError:
        print("{0} not found at {1}".format(cdxjParts[0], digests[-1]))
        respString = ('{0} not found in IPFS :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_IP, IPWBREPLAY_PORT)
        return Response(respString)
    except TypeError:
        print('A type error occurred')
        print(traceback.format_exc())
        print(sys.exc_info()[0])
    except HashNotFoundError:
        print("Hashes not found")
        return '', 404
    except Exception as e:
        print('Unknown exception occurred while fetching from ipfs.')
        print(sys.exc_info()[0])
        sys.exit()

    if 'encryption_method' in jObj:
        keyString = None
        while keyString is None:
            if 'encryption_key' in jObj:
                keyString = jObj['encryption_key']
            else:
                askForKey = ('Enter a path for file',
                             ' containing decryption key: \n> ')
                keyString = raw_input(askForKey)

        encryptionMethod = None
        if jObj['encryption_method'] == 'xor':
            encryptionMethod = XOR

        pKey = encryptionMethod.new(keyString)
        payload = pKey.decrypt(base64.b64decode(payload))
        hKey = encryptionMethod.new(keyString)
        header = hKey.decrypt(base64.b64decode(header))

    hLines = header.split('\n')
    hLines.pop(0)

    resp = Response(payload)

    for idx, hLine in enumerate(hLines):
        k, v = hLine.split(': ', 1)

        if k.lower() == 'transfer-encoding' and v.lower() == 'chunked':
            try:
                unchunkedPayload = extractResponseFromChunkedData(payload)
            except Exception as e:
                continue  # Data may have no actually been chunked
            resp.set_data(unchunkedPayload)

        if k.lower() not in ["content-type", "content-encoding"]:
            k = "X-Archive-Orig-" + k

        resp.headers[k] = v

    # Add ipwb header for additional SW logic
    newPayload = resp.get_data()
    ipwbjsinject = """<script src="/webui/webui.js"></script>
                      <script>injectIPWBJS()</script>"""
    newPayload = newPayload.replace('</html>', ipwbjsinject + '</html>')
    resp.set_data(newPayload)

    resp.headers['Memento-Datetime'] = ipwbConfig.datetimeToRFC1123(datetime)

    # Get TimeMap for Link response header
    respWithLinkHeader = getLinkHeaderAbbreviatedTimeMap(path, datetime)
    resp.headers['Link'] = respWithLinkHeader.replace('\n', ' ')

    return resp
Example #15
0
def show_uri(path):
    global IPFS_API

    if len(path) == 0:
        return showWebUI('index.html')
        sys.exit()

    daemonAddress = '{0}:{1}'.format(IPFSAPI_IP, IPFSAPI_PORT)
    if not ipwbConfig.isDaemonAlive(daemonAddress):
        errStr = ('IPFS daemon not running. '
                  'Start it using $ ipfs daemon on the command-line '
                  ' or from the <a href="/">'
                  'IPWB replay homepage</a>.')
        return Response(errStr)

    # show the user profile for that user
    cdxjLine = ''

    try:
        s = surt.surt(path, path_strip_trailing_slash_unless_empty=False)
        indexPath = ipwbConfig.getIPWBReplayIndexPath()
        cdxjLine = getCDXJLine_binarySearch(s, indexPath)
    except:
        print sys.exc_info()[0]
        respString = ('{0} not found :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_IP, IPWBREPLAY_PORT)
        return Response(respString)
    if cdxjLine is None:  # Resource not found in archives
        return Response(status=404)

    cdxjParts = cdxjLine.split(" ", 2)

    jObj = json.loads(cdxjParts[2])

    digests = jObj['locator'].split('/')

    try:
        payload = IPFS_API.cat(digests[-1], timeout=1)
        header = IPFS_API.cat(digests[-2])
    except ipfsapi.exceptions.TimeoutError:
        print "{0} not found at {1}".format(cdxjParts[0], digests[-1])
        respString = ('{0} not found in IPFS :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_IP, IPWBREPLAY_PORT)
        return Response(respString)
    except:
        print sys.exc_info()[0]
        print "general error"
        sys.exit()

    if 'encryption_method' in jObj:
        pKey = XOR.new(jObj['encryption_key'])
        payload = pKey.decrypt(base64.b64decode(payload))
        hKey = XOR.new(jObj['encryption_key'])
        header = hKey.decrypt(base64.b64decode(header))

    hLines = header.split('\n')
    hLines.pop(0)

    resp = Response(payload)

    for idx, hLine in enumerate(hLines):
        k, v = hLine.split(': ', 1)
        if k.lower() != "content-type":
            k = "X-Archive-Orig-" + k
        resp.headers[k] = v

    return resp
Example #16
0
def show_uri(path, datetime=None):
    global IPFS_API

    if len(path) == 0:
        return showWebUI('index.html')
        sys.exit()

    if path == 'serviceWorker.js':
        return getServiceWorker(path)
        sys.exit()

    daemonAddress = '{0}:{1}'.format(IPFSAPI_IP, IPFSAPI_PORT)
    if not ipwbConfig.isDaemonAlive(daemonAddress):
        errStr = ('IPFS daemon not running. '
                  'Start it using $ ipfs daemon on the command-line '
                  ' or from the <a href="/">'
                  'IPWB replay homepage</a>.')
        return Response(errStr)
    cdxjLine = ''
    try:
        surtedURI = surt.surt(  # Good ol' pep8 line length
            path,
            path_strip_trailing_slash_unless_empty=False)
        indexPath = ipwbConfig.getIPWBReplayIndexPath()

        searchString = surtedURI
        if datetime is not None:
            searchString = surtedURI + ' ' + datetime

        cdxjLine = getCDXJLine_binarySearch(searchString, indexPath)
    except:
        print sys.exc_info()[0]
        respString = ('{0} not found :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_IP, IPWBREPLAY_PORT)
        return Response(respString)
    if cdxjLine is None:  # Resource not found in archives
        msg = '<h1>ERROR 404</h1>'
        msg += 'No capture found for {0} at {1}.'.format(path, datetime)
        linesWithSameURIR = getCDXJLinesWithURIR(path)

        if linesWithSameURIR:
            msg += '<p>{0} capture(s) available:</p><ul>'.format(
                len(linesWithSameURIR))
            for line in linesWithSameURIR:
                fields = line.split(' ', 2)
                msg += ('<li><a href="/{1}/{0}">{0} at {1}</a></li>'.format(
                    unsurt(fields[0]), fields[1]))
            msg += '</ul>'
        return Response(msg, status=404)

    cdxjParts = cdxjLine.split(" ", 2)

    jObj = json.loads(cdxjParts[2])
    datetime = cdxjParts[1]

    digests = jObj['locator'].split('/')

    try:
        payload = IPFS_API.cat(digests[-1], timeout=1)
        header = IPFS_API.cat(digests[-2])
    except ipfsapi.exceptions.TimeoutError:
        print "{0} not found at {1}".format(cdxjParts[0], digests[-1])
        respString = ('{0} not found in IPFS :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_IP, IPWBREPLAY_PORT)
        return Response(respString)
    except:
        print sys.exc_info()[0]
        print "general error"
        sys.exit()

    if 'encryption_method' in jObj:
        pKey = XOR.new(jObj['encryption_key'])
        payload = pKey.decrypt(base64.b64decode(payload))
        hKey = XOR.new(jObj['encryption_key'])
        header = hKey.decrypt(base64.b64decode(header))

    hLines = header.split('\n')
    hLines.pop(0)

    resp = Response(payload)

    for idx, hLine in enumerate(hLines):
        k, v = hLine.split(': ', 1)

        if k.lower() == 'transfer-encoding' and v.lower() == 'chunked':
            resp.set_data(extractResponseFromChunkedData(payload))
        if k.lower() != "content-type":
            k = "X-Archive-Orig-" + k

        resp.headers[k] = v

    resp.headers['Memento-Datetime'] = ipwbConfig.datetimeToRFC1123(datetime)

    return resp
Example #17
0
def show_uri(path, datetime=None):
    global IPFS_API

    if len(path) == 0:
        return showWebUI('index.html')

    # TODO: Use a better approach to serve static contents
    # instead of using the same logic for every JS file as the SW script
    localScripts = [
        'serviceWorker.js', 'reconstructive.js', 'reconstructive-banner.js'
    ]
    if path in localScripts:
        return getServiceWorker(path)

    daemonAddress = '{0}:{1}'.format(IPFSAPI_HOST, IPFSAPI_PORT)
    if not ipwbUtils.isDaemonAlive(daemonAddress):
        errStr = ('IPFS daemon not running. '
                  'Start it using $ ipfs daemon on the command-line '
                  ' or from the <a href="/">'
                  'IPWB replay homepage</a>.')
        return Response(errStr, status=503)

    path = getCompleteURI(path)
    cdxjLine = ''
    try:
        surtedURI = surt.surt(path,
                              path_strip_trailing_slash_unless_empty=False)
        indexPath = ipwbUtils.getIPWBReplayIndexPath()

        searchString = surtedURI
        if datetime is not None:
            searchString = surtedURI + ' ' + datetime

        cdxjLine = getCDXJLine_binarySearch(searchString, indexPath)
        print('CDXJ Line: {0}'.format(cdxjLine))

    except Exception as e:
        print(sys.exc_info()[0])
        respString = ('{0} not found :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_HOST, IPWBREPLAY_PORT)
        return Response(respString)
    if cdxjLine is None:  # Resource not found in archives
        return generateNoMementosInterface(path, datetime)

    cdxjParts = cdxjLine.split(" ", 2)
    jObj = json.loads(cdxjParts[2])
    datetime = cdxjParts[1]

    digests = jObj['locator'].split('/')

    class HashNotFoundError(Exception):
        pass

    payload = None
    header = None
    try:

        def handler(signum, frame):
            raise HashNotFoundError()

        if os.name != 'nt':  # Bug #310
            signal.signal(signal.SIGALRM, handler)
            signal.alarm(10)

        payload = IPFS_API.cat(digests[-1])
        header = IPFS_API.cat(digests[-2])

        if os.name != 'nt':  # Bug #310
            signal.alarm(0)

    except ipfsapi.exceptions.TimeoutError:
        print("{0} not found at {1}".format(cdxjParts[0], digests[-1]))
        respString = ('{0} not found in IPFS :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_HOST, IPWBREPLAY_PORT)
        return Response(respString)
    except TypeError as e:
        print('A type error occurred')
        print(e)
        abort(500)
    except HTTPError as e:
        print("Fetching from the IPFS failed")
        print(e)
        abort(503)
    except HashNotFoundError:
        if payload is None:
            print("Hashes not found:\n\t{0}\n\t{1}".format(
                digests[-1], digests[-2]))
            abort(404)
        else:  # payload found but not header, fabricate header
            print("HTTP header not found, fabricating for resp replay")
            header = ''
    except Exception as e:
        print('Unknown exception occurred while fetching from ipfs.')
        print(e)
        abort(500)

    if 'encryption_method' in jObj:
        keyString = None
        while keyString is None:
            if 'encryption_key' in jObj:
                keyString = jObj['encryption_key']
            else:
                askForKey = ('Enter a path for file',
                             ' containing decryption key: \n> ')
                keyString = raw_input(askForKey)

        paddedEncryptionKey = pad(keyString, AES.block_size)
        key = base64.b64encode(paddedEncryptionKey)

        nonce = b64decode(jObj['encryption_nonce'])
        cipher = AES.new(key, AES.MODE_CTR, nonce=nonce)
        header = cipher.decrypt(base64.b64decode(header))
        payload = cipher.decrypt(base64.b64decode(payload))

    hLines = header.split('\n')
    hLines.pop(0)

    status = 200
    if 'status_code' in jObj:
        status = jObj['status_code']

    resp = Response(payload, status=status)

    for idx, hLine in enumerate(hLines):
        k, v = hLine.split(': ', 1)

        if k.lower() == 'transfer-encoding' and v.lower() == 'chunked':
            try:
                unchunkedPayload = extractResponseFromChunkedData(payload)
            except Exception as e:
                continue  # Data may have no actually been chunked
            resp.set_data(unchunkedPayload)

        if k.lower() not in ["content-type", "content-encoding", "location"]:
            k = "X-Archive-Orig-" + k

        resp.headers[k] = v

    # Add ipwb header for additional SW logic
    newPayload = resp.get_data()
    ipwbjsinject = """<script src="/webui/webui.js"></script>
                      <script>injectIPWBJS()</script>"""
    newPayload = newPayload.replace('</html>', ipwbjsinject + '</html>')
    resp.set_data(newPayload)

    resp.headers['Memento-Datetime'] = ipwbUtils.digits14ToRFC1123(datetime)

    if header is None:
        resp.headers['X-Headers-Generated-By'] = 'InterPlanetary Wayback'

    # Get TimeMap for Link response header
    # respWithLinkHeader = getLinkHeaderAbbreviatedTimeMap(path, datetime)
    # resp.headers['Link'] = respWithLinkHeader.replace('\n', ' ')

    if status[0] == '3' and isUri(resp.headers.get('Location')):
        # Bad assumption that the URI-M will contain \d14 but works for now.
        uriBeforeURIR = request.url[:re.search(r'/\d{14}/', request.url).end()]
        newURIM = uriBeforeURIR + resp.headers['Location']
        resp.headers['Location'] = newURIM

    return resp