Example #1
0
def showMementosForURIRs(urir):
    urir = getCompleteURI(urir)

    if ipwbConfig.isLocalHosty(urir):
        urir = urir.split('/', 4)[4]
    s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False)
    indexPath = ipwbConfig.getIPWBReplayIndexPath()

    print('Getting CDXJ Lines with the URI-R {0} from {1}'.format(
        urir, indexPath))
    cdxjLinesWithURIR = getCDXJLinesWithURIR(urir, indexPath)

    if len(cdxjLinesWithURIR) == 1:
        fields = cdxjLinesWithURIR[0].split(' ', 2)
        redirectURI = '/{1}/{0}'.format(unsurt(fields[0]), fields[1])
        return redirect(redirectURI, code=302)

    msg = ''
    if cdxjLinesWithURIR:
        msg += '<p>{0} capture(s) available:</p><ul>'.format(
            len(cdxjLinesWithURIR))
        for line in cdxjLinesWithURIR:
            fields = line.split(' ', 2)
            dt14 = fields[1]
            dtrfc1123 = ipwbConfig.datetimeToRFC1123(fields[1])
            msg += ('<li><a href="/{1}/{0}">{0} at {2}</a></li>'.format(
                unsurt(fields[0]), dt14, dtrfc1123))
        msg += '</ul>'
    return Response(msg)
Example #2
0
def generateTimeMapFromCDXJLines(cdxjLines, original, tmself):
    tmData = '<{0}>; rel="original",\n'.format(unsurt(original))

    tmData += '<{0}>; rel="self"; '.format(tmself)
    tmData += 'type="application/link-format",\n'

    hostAndPort = tmself[0:tmself.index('timemap/')]

    for line in cdxjLines:
        (surtURI, datetime, json) = line.split(' ', 2)
        dtRFC1123 = ipwbConfig.datetimeToRFC1123(datetime)
        tmData += '<{0}{1}/{2}>; rel="memento"; datetime="{3}",\n'.format(
            hostAndPort, datetime, unsurt(surtURI), dtRFC1123)
    tmData = tmData[0:-2]  # Trim final , and LF
    return tmData
Example #3
0
def generateCDXJTimeMapFromCDXJLines(cdxjLines, original, tmself):
    tmurl = getProxiedURIT(tmself)
    if app.proxy is not None:
        tmself = urlunsplit(tmurl)

    # unsurted URI will never have a scheme, add one
    originalURI = 'http://{0}'.format(unsurt(original))

    tmData = '!context ["http://tools.ietf.org/html/rfc7089"]\n'
    tmData += '!id {{"uri": "{0}"}}\n'.format(tmself)
    tmData += '!keys ["memento_datetime_YYYYMMDDhhmmss"]\n'
    tmData += '!meta {{"original_uri": "{0}"}}\n'.format(originalURI)

    linkTMURI = tmself.replace('/timemap/cdxj/', '/timemap/link/')
    tmData += ('!meta {{"timemap_uri": {{'
               '"link_format": "{0}", '
               '"cdxj_format": "{1}"'
               '}}}}\n').format(linkTMURI, tmself)
    hostAndPort = tmself[0:tmself.index('timemap/')]

    for i, line in enumerate(cdxjLines):
        (surtURI, datetime, json) = line.split(' ', 2)
        dtRFC1123 = ipwbConfig.datetimeToRFC1123(datetime)
        firstLastStr = ''

        if len(cdxjLines) > 1:
            if i == 0:
                firstLastStr = 'first '
            elif i == len(cdxjLines) - 1:
                firstLastStr = 'last '
        elif len(cdxjLines) == 1:
            firstLastStr = 'first last '

        tmData += ('{1} {{'
                   '"uri": "{0}{1}/{2}", '
                   '"rel": "{3}memento", '
                   '"datetime"="{4}"}}\n').format(hostAndPort, datetime,
                                                  unsurt(surtURI),
                                                  firstLastStr, dtRFC1123)
    tmData = tmData[0:-1]  # Trim final , and LF
    return tmData
Example #4
0
def generateLinkTimeMapFromCDXJLines(cdxjLines, original, tmself):
    tmurl = getProxiedURIT(tmself)
    if app.proxy is not None:
        tmself = urlunsplit(tmurl)

    # Extract and trim for host:port prepending
    tmurl[2] = ''  # Clear TM path
    hostAndPort = urlunsplit(tmurl) + '/'

    # unsurted URI will never have a scheme, add one
    originalURI = 'http://{0}'.format(unsurt(original))

    tmData = '<{0}>; rel="original",\n'.format(originalURI)
    tmData += '<{0}>; rel="self timemap"; '.format(tmself)
    tmData += 'type="application/link-format",\n'

    cdxjTMURI = tmself.replace('/timemap/link/', '/timemap/cdxj/')
    tmData += '<{0}>; rel="timemap"; '.format(cdxjTMURI)
    tmData += 'type="application/cdxj+ors",\n'

    for i, line in enumerate(cdxjLines):
        (surtURI, datetime, json) = line.split(' ', 2)
        dtRFC1123 = ipwbConfig.datetimeToRFC1123(datetime)
        firstLastStr = ''

        if len(cdxjLines) > 1:
            if i == 0:
                firstLastStr = 'first '
            elif i == len(cdxjLines) - 1:
                firstLastStr = 'last '
        elif len(cdxjLines) == 1:
            firstLastStr = 'first last '

        tmData += '<{0}{1}/{2}>; rel="{3}memento"; datetime="{4}",\n'.format(
            hostAndPort, datetime, unsurt(surtURI), firstLastStr, dtRFC1123)
    tmData = tmData[0:-2]  # Trim final , and LF
    return tmData
Example #5
0
def show_uri(path, datetime=None):
    global IPFS_API

    if len(path) == 0:
        return showWebUI('index.html')

    # TODO: Use a better approach to serve static contents
    # instead of using the same logic for every JS file as the SW script
    localScripts = [
        'serviceWorker.js', 'reconstructive.js', 'reconstructive-banner.js'
    ]
    if path in localScripts:
        return getServiceWorker(path)

    daemonAddress = '{0}:{1}'.format(IPFSAPI_IP, IPFSAPI_PORT)
    if not ipwbConfig.isDaemonAlive(daemonAddress):
        errStr = ('IPFS daemon not running. '
                  'Start it using $ ipfs daemon on the command-line '
                  ' or from the <a href="/">'
                  'IPWB replay homepage</a>.')
        return Response(errStr, status=503)

    path = getCompleteURI(path)
    cdxjLine = ''
    try:
        surtedURI = surt.surt(path,
                              path_strip_trailing_slash_unless_empty=False)
        indexPath = ipwbConfig.getIPWBReplayIndexPath()

        searchString = surtedURI
        if datetime is not None:
            searchString = surtedURI + ' ' + datetime

        cdxjLine = getCDXJLine_binarySearch(searchString, indexPath)
        print('CDXJ Line: {0}'.format(cdxjLine))

    except Exception as e:
        print(sys.exc_info()[0])
        respString = ('{0} not found :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_IP, IPWBREPLAY_PORT)
        return Response(respString)
    if cdxjLine is None:  # Resource not found in archives
        return generateNoMementosInterface(path, datetime)

    cdxjParts = cdxjLine.split(" ", 2)
    jObj = json.loads(cdxjParts[2])
    datetime = cdxjParts[1]

    digests = jObj['locator'].split('/')

    class HashNotFoundError(Exception):
        pass

    try:

        def handler(signum, frame):
            raise HashNotFoundError()

        signal.signal(signal.SIGALRM, handler)
        signal.alarm(10)

        payload = IPFS_API.cat(digests[-1])
        header = IPFS_API.cat(digests[-2])

        signal.alarm(0)

    except ipfsapi.exceptions.TimeoutError:
        print("{0} not found at {1}".format(cdxjParts[0], digests[-1]))
        respString = ('{0} not found in IPFS :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_IP, IPWBREPLAY_PORT)
        return Response(respString)
    except TypeError:
        print('A type error occurred')
        print(traceback.format_exc())
        print(sys.exc_info()[0])
    except HashNotFoundError:
        print("Hashes not found")
        return '', 404
    except Exception as e:
        print('Unknown exception occurred while fetching from ipfs.')
        print(sys.exc_info()[0])
        sys.exit()

    if 'encryption_method' in jObj:
        keyString = None
        while keyString is None:
            if 'encryption_key' in jObj:
                keyString = jObj['encryption_key']
            else:
                askForKey = ('Enter a path for file',
                             ' containing decryption key: \n> ')
                keyString = raw_input(askForKey)

        encryptionMethod = None
        if jObj['encryption_method'] == 'xor':
            encryptionMethod = XOR

        pKey = encryptionMethod.new(keyString)
        payload = pKey.decrypt(base64.b64decode(payload))
        hKey = encryptionMethod.new(keyString)
        header = hKey.decrypt(base64.b64decode(header))

    hLines = header.split('\n')
    hLines.pop(0)

    resp = Response(payload)

    for idx, hLine in enumerate(hLines):
        k, v = hLine.split(': ', 1)

        if k.lower() == 'transfer-encoding' and v.lower() == 'chunked':
            try:
                unchunkedPayload = extractResponseFromChunkedData(payload)
            except Exception as e:
                continue  # Data may have no actually been chunked
            resp.set_data(unchunkedPayload)

        if k.lower() not in ["content-type", "content-encoding"]:
            k = "X-Archive-Orig-" + k

        resp.headers[k] = v

    # Add ipwb header for additional SW logic
    newPayload = resp.get_data()
    ipwbjsinject = """<script src="/webui/webui.js"></script>
                      <script>injectIPWBJS()</script>"""
    newPayload = newPayload.replace('</html>', ipwbjsinject + '</html>')
    resp.set_data(newPayload)

    resp.headers['Memento-Datetime'] = ipwbConfig.datetimeToRFC1123(datetime)

    # Get TimeMap for Link response header
    respWithLinkHeader = getLinkHeaderAbbreviatedTimeMap(path, datetime)
    resp.headers['Link'] = respWithLinkHeader.replace('\n', ' ')

    return resp
Example #6
0
def show_uri(path, datetime=None):
    global IPFS_API

    if len(path) == 0:
        return showWebUI('index.html')
        sys.exit()

    if path == 'serviceWorker.js':
        return getServiceWorker(path)
        sys.exit()

    daemonAddress = '{0}:{1}'.format(IPFSAPI_IP, IPFSAPI_PORT)
    if not ipwbConfig.isDaemonAlive(daemonAddress):
        errStr = ('IPFS daemon not running. '
                  'Start it using $ ipfs daemon on the command-line '
                  ' or from the <a href="/">'
                  'IPWB replay homepage</a>.')
        return Response(errStr)
    cdxjLine = ''
    try:
        surtedURI = surt.surt(  # Good ol' pep8 line length
            path,
            path_strip_trailing_slash_unless_empty=False)
        indexPath = ipwbConfig.getIPWBReplayIndexPath()

        searchString = surtedURI
        if datetime is not None:
            searchString = surtedURI + ' ' + datetime

        cdxjLine = getCDXJLine_binarySearch(searchString, indexPath)
    except:
        print sys.exc_info()[0]
        respString = ('{0} not found :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_IP, IPWBREPLAY_PORT)
        return Response(respString)
    if cdxjLine is None:  # Resource not found in archives
        msg = '<h1>ERROR 404</h1>'
        msg += 'No capture found for {0} at {1}.'.format(path, datetime)
        linesWithSameURIR = getCDXJLinesWithURIR(path)

        if linesWithSameURIR:
            msg += '<p>{0} capture(s) available:</p><ul>'.format(
                len(linesWithSameURIR))
            for line in linesWithSameURIR:
                fields = line.split(' ', 2)
                msg += ('<li><a href="/{1}/{0}">{0} at {1}</a></li>'.format(
                    unsurt(fields[0]), fields[1]))
            msg += '</ul>'
        return Response(msg, status=404)

    cdxjParts = cdxjLine.split(" ", 2)

    jObj = json.loads(cdxjParts[2])
    datetime = cdxjParts[1]

    digests = jObj['locator'].split('/')

    try:
        payload = IPFS_API.cat(digests[-1], timeout=1)
        header = IPFS_API.cat(digests[-2])
    except ipfsapi.exceptions.TimeoutError:
        print "{0} not found at {1}".format(cdxjParts[0], digests[-1])
        respString = ('{0} not found in IPFS :(' +
                      ' <a href="http://{1}:{2}">Go home</a>').format(
                          path, IPWBREPLAY_IP, IPWBREPLAY_PORT)
        return Response(respString)
    except:
        print sys.exc_info()[0]
        print "general error"
        sys.exit()

    if 'encryption_method' in jObj:
        pKey = XOR.new(jObj['encryption_key'])
        payload = pKey.decrypt(base64.b64decode(payload))
        hKey = XOR.new(jObj['encryption_key'])
        header = hKey.decrypt(base64.b64decode(header))

    hLines = header.split('\n')
    hLines.pop(0)

    resp = Response(payload)

    for idx, hLine in enumerate(hLines):
        k, v = hLine.split(': ', 1)

        if k.lower() == 'transfer-encoding' and v.lower() == 'chunked':
            resp.set_data(extractResponseFromChunkedData(payload))
        if k.lower() != "content-type":
            k = "X-Archive-Orig-" + k

        resp.headers[k] = v

    resp.headers['Memento-Datetime'] = ipwbConfig.datetimeToRFC1123(datetime)

    return resp