Example #1
0
def start(cdxjFilePath=INDEX_FILE, proxy=None):
    hostPort = ipwbConfig.getIPWBReplayConfig()
    app.proxy = proxy

    if not hostPort:
        ipwbConfig.setIPWBReplayConfig(IPWBREPLAY_IP, IPWBREPLAY_PORT)
        hostPort = ipwbConfig.getIPWBReplayConfig()

    if ipwbConfig.isDaemonAlive():
        if cdxjFilePath == INDEX_FILE:
            ipwbConfig.firstRun()
        ipwbConfig.setIPWBReplayIndexPath(cdxjFilePath)
        app.cdxjFilePath = cdxjFilePath
    else:
        print('Sample data not pulled from IPFS.')
        print('Check that the IPFS daemon is running.')

    try:
        print('IPWB replay started on http://{0}:{1}'.format(
            IPWBREPLAY_IP, IPWBREPLAY_PORT))
        app.run(host='0.0.0.0', port=IPWBREPLAY_PORT)
    except gaierror:
        print('Detected no active Internet connection.')
        print('Overriding to use default IP and port configuration.')
        app.run()
    except socketerror:
        print('Address {0}:{1} already in use!'.format(IPWBREPLAY_IP,
                                                       IPWBREPLAY_PORT))
        sys.exit()
Example #2
0
def start(cdxjFilePath, proxy=None):
    hostPort = ipwbUtils.getIPWBReplayConfig()
    app.proxy = proxy

    if not hostPort:
        ipwbUtils.setIPWBReplayConfig(IPWBREPLAY_HOST, IPWBREPLAY_PORT)
        hostPort = ipwbUtils.getIPWBReplayConfig()

    if ipwbUtils.isDaemonAlive():
        ipwbUtils.setIPWBReplayIndexPath(cdxjFilePath)
        app.cdxjFilePath = cdxjFilePath
    else:
        print('Sample data not pulled from IPFS.')
        print('Check that the IPFS daemon is running.')

    # Perform checks for CDXJ file existence, TODO: reuse cached contents
    app.cdxjFileContents = getIndexFileContents(cdxjFilePath)

    try:
        print('IPWB replay started on http://{0}:{1}'.format(
            IPWBREPLAY_HOST, IPWBREPLAY_PORT))
        app.run(host='0.0.0.0', port=IPWBREPLAY_PORT)
    except gaierror:
        print('Detected no active Internet connection.')
        print('Overriding to use default IP and port configuration.')
        app.run()
    except socketerror:
        print('Address {0}:{1} already in use!'.format(IPWBREPLAY_HOST,
                                                       IPWBREPLAY_PORT))
        sys.exit()
Example #3
0
def start(cdxjFilePath=INDEX_FILE):
    hostPort = ipwbConfig.getIPWBReplayConfig()
    if not hostPort:
        ipwbConfig.setIPWBReplayConfig(IPWBREPLAY_IP, IPWBREPLAY_PORT)
        hostPort = ipwbConfig.getIPWBReplayConfig()

    ipwbConfig.firstRun()
    ipwbConfig.setIPWBReplayIndexPath(cdxjFilePath)
    app.cdxjFilePath = cdxjFilePath

    app.run(host=IPWBREPLAY_IP, port=IPWBREPLAY_PORT)
Example #4
0
def showTimeMap(urir, format):
    urir = getCompleteURI(urir)
    s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False)
    indexPath = ipwbUtils.getIPWBReplayIndexPath()

    cdxjLinesWithURIR = getCDXJLinesWithURIR(urir, indexPath)
    tmContentType = ''

    hostAndPort = ipwbUtils.getIPWBReplayConfig()

    tgURI = 'http://{0}:{1}/timegate/{2}'.format(hostAndPort[0],
                                                 hostAndPort[1], urir)

    tm = ''  # Initialize for usage beyond below conditionals
    if format == 'link':
        tm = generateLinkTimeMapFromCDXJLines(cdxjLinesWithURIR, s,
                                              request.url, tgURI)
        tmContentType = 'application/link-format'
    elif format == 'cdxj':
        tm = generateCDXJTimeMapFromCDXJLines(cdxjLinesWithURIR, s,
                                              request.url, tgURI)
        tmContentType = 'application/cdxj+ors'

    resp = Response(tm)
    resp.headers['Content-Type'] = tmContentType

    return resp
Example #5
0
def getLinkHeaderAbbreviatedTimeMap(urir, pivotDatetime):
    s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False)
    indexPath = ipwbConfig.getIPWBReplayIndexPath()
    cdxjLinesWithURIR = getCDXJLinesWithURIR(urir, indexPath)
    hostAndPort = ipwbConfig.getIPWBReplayConfig()

    tmURI = 'http://{0}:{1}/timemap/link/{2}'.format(
        'localhost',  # hostAndPort[0],
        hostAndPort[1],
        urir)
    tm = generateLinkTimeMapFromCDXJLines(cdxjLinesWithURIR, s, tmURI)

    # Fix base TM relation when viewing abbrev version in Link resp
    tm = tm.replace('rel="self"', 'rel="timemap"')

    # Only one memento in TimeMap
    if 'rel="first last memento"' in tm:
        return tm

    tmLines = tm.split('\n')
    for idx, line in enumerate(tmLines):
        if len(re.findall('rel=.*memento"', line)) == 0:
            continue  # Not a memento

        if pivotDatetime in line:
            addBothNextAndPrev = False
            if idx > 0 and idx < len(tmLines) - 1:
                addBothNextAndPrev = True

            if addBothNextAndPrev or idx == 0:
                tmLines[idx + 1] = \
                    tmLines[idx + 1].replace('memento"', 'next memento"')
            if addBothNextAndPrev or idx == len(tmLines) - 1:
                tmLines[idx - 1] = \
                    tmLines[idx - 1].replace('memento"', 'prev memento"')
            break

    # Remove all mementos in abbrev TM that are not:
    #   first, last, prev, next, or pivot
    for idx, line in enumerate(tmLines):
        if len(re.findall('rel=.*memento"', line)) == 0:
            continue  # Not a memento
        if pivotDatetime in line:
            continue

        if len(re.findall('rel=.*(next|prev|first|last)', line)) == 0:
            tmLines[idx] = ''

    tm = '\n'.join(tmLines)

    return tm
Example #6
0
    return closest


def getCDXLines(surtURI):
    with open('index.cdx', 'r') as cdxFile:
        cdxlobj = []
        bsResp = iter_exact(cdxFile, surtURI)
        for cdxl in bsResp:
            (suri, dttm, jobj) = cdxl.split(' ', 2)
            if suri != surtURI:
                break
            cdxlobj.append((suri, dttm, jobj))
        return cdxlobj
'''
if __name__ == "__main__":
    hostPort = ipwbConfig.getIPWBReplayConfig()
    if not hostPort:
        ipwbConfig.setIPWBReplayConfig(IPWBREPLAY_IP, IPWBREPLAY_PORT)
        hostPort = ipwbConfig.getIPWBReplayConfig()
    # print hostPort
    # sys.exit()
    app.run(host=IPWBREPLAY_IP, port=IPWBREPLAY_PORT)

# Read in URI, convert to SURT
#  surt(uriIn)
# Get SURTed URI lines in CDXJ
#  Read CDXJ
#  Do bin search to find relevant lines

# read IPFS hash from relevant lines (header, payload)