def start(cdxjFilePath=INDEX_FILE, proxy=None): hostPort = ipwbConfig.getIPWBReplayConfig() app.proxy = proxy if not hostPort: ipwbConfig.setIPWBReplayConfig(IPWBREPLAY_IP, IPWBREPLAY_PORT) hostPort = ipwbConfig.getIPWBReplayConfig() if ipwbConfig.isDaemonAlive(): if cdxjFilePath == INDEX_FILE: ipwbConfig.firstRun() ipwbConfig.setIPWBReplayIndexPath(cdxjFilePath) app.cdxjFilePath = cdxjFilePath else: print('Sample data not pulled from IPFS.') print('Check that the IPFS daemon is running.') try: print('IPWB replay started on http://{0}:{1}'.format( IPWBREPLAY_IP, IPWBREPLAY_PORT)) app.run(host='0.0.0.0', port=IPWBREPLAY_PORT) except gaierror: print('Detected no active Internet connection.') print('Overriding to use default IP and port configuration.') app.run() except socketerror: print('Address {0}:{1} already in use!'.format(IPWBREPLAY_IP, IPWBREPLAY_PORT)) sys.exit()
def start(cdxjFilePath, proxy=None): hostPort = ipwbUtils.getIPWBReplayConfig() app.proxy = proxy if not hostPort: ipwbUtils.setIPWBReplayConfig(IPWBREPLAY_HOST, IPWBREPLAY_PORT) hostPort = ipwbUtils.getIPWBReplayConfig() if ipwbUtils.isDaemonAlive(): ipwbUtils.setIPWBReplayIndexPath(cdxjFilePath) app.cdxjFilePath = cdxjFilePath else: print('Sample data not pulled from IPFS.') print('Check that the IPFS daemon is running.') # Perform checks for CDXJ file existence, TODO: reuse cached contents app.cdxjFileContents = getIndexFileContents(cdxjFilePath) try: print('IPWB replay started on http://{0}:{1}'.format( IPWBREPLAY_HOST, IPWBREPLAY_PORT)) app.run(host='0.0.0.0', port=IPWBREPLAY_PORT) except gaierror: print('Detected no active Internet connection.') print('Overriding to use default IP and port configuration.') app.run() except socketerror: print('Address {0}:{1} already in use!'.format(IPWBREPLAY_HOST, IPWBREPLAY_PORT)) sys.exit()
def start(cdxjFilePath=INDEX_FILE): hostPort = ipwbConfig.getIPWBReplayConfig() if not hostPort: ipwbConfig.setIPWBReplayConfig(IPWBREPLAY_IP, IPWBREPLAY_PORT) hostPort = ipwbConfig.getIPWBReplayConfig() ipwbConfig.firstRun() ipwbConfig.setIPWBReplayIndexPath(cdxjFilePath) app.cdxjFilePath = cdxjFilePath app.run(host=IPWBREPLAY_IP, port=IPWBREPLAY_PORT)
def showTimeMap(urir, format): urir = getCompleteURI(urir) s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False) indexPath = ipwbUtils.getIPWBReplayIndexPath() cdxjLinesWithURIR = getCDXJLinesWithURIR(urir, indexPath) tmContentType = '' hostAndPort = ipwbUtils.getIPWBReplayConfig() tgURI = 'http://{0}:{1}/timegate/{2}'.format(hostAndPort[0], hostAndPort[1], urir) tm = '' # Initialize for usage beyond below conditionals if format == 'link': tm = generateLinkTimeMapFromCDXJLines(cdxjLinesWithURIR, s, request.url, tgURI) tmContentType = 'application/link-format' elif format == 'cdxj': tm = generateCDXJTimeMapFromCDXJLines(cdxjLinesWithURIR, s, request.url, tgURI) tmContentType = 'application/cdxj+ors' resp = Response(tm) resp.headers['Content-Type'] = tmContentType return resp
def getLinkHeaderAbbreviatedTimeMap(urir, pivotDatetime): s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False) indexPath = ipwbConfig.getIPWBReplayIndexPath() cdxjLinesWithURIR = getCDXJLinesWithURIR(urir, indexPath) hostAndPort = ipwbConfig.getIPWBReplayConfig() tmURI = 'http://{0}:{1}/timemap/link/{2}'.format( 'localhost', # hostAndPort[0], hostAndPort[1], urir) tm = generateLinkTimeMapFromCDXJLines(cdxjLinesWithURIR, s, tmURI) # Fix base TM relation when viewing abbrev version in Link resp tm = tm.replace('rel="self"', 'rel="timemap"') # Only one memento in TimeMap if 'rel="first last memento"' in tm: return tm tmLines = tm.split('\n') for idx, line in enumerate(tmLines): if len(re.findall('rel=.*memento"', line)) == 0: continue # Not a memento if pivotDatetime in line: addBothNextAndPrev = False if idx > 0 and idx < len(tmLines) - 1: addBothNextAndPrev = True if addBothNextAndPrev or idx == 0: tmLines[idx + 1] = \ tmLines[idx + 1].replace('memento"', 'next memento"') if addBothNextAndPrev or idx == len(tmLines) - 1: tmLines[idx - 1] = \ tmLines[idx - 1].replace('memento"', 'prev memento"') break # Remove all mementos in abbrev TM that are not: # first, last, prev, next, or pivot for idx, line in enumerate(tmLines): if len(re.findall('rel=.*memento"', line)) == 0: continue # Not a memento if pivotDatetime in line: continue if len(re.findall('rel=.*(next|prev|first|last)', line)) == 0: tmLines[idx] = '' tm = '\n'.join(tmLines) return tm
return closest def getCDXLines(surtURI): with open('index.cdx', 'r') as cdxFile: cdxlobj = [] bsResp = iter_exact(cdxFile, surtURI) for cdxl in bsResp: (suri, dttm, jobj) = cdxl.split(' ', 2) if suri != surtURI: break cdxlobj.append((suri, dttm, jobj)) return cdxlobj ''' if __name__ == "__main__": hostPort = ipwbConfig.getIPWBReplayConfig() if not hostPort: ipwbConfig.setIPWBReplayConfig(IPWBREPLAY_IP, IPWBREPLAY_PORT) hostPort = ipwbConfig.getIPWBReplayConfig() # print hostPort # sys.exit() app.run(host=IPWBREPLAY_IP, port=IPWBREPLAY_PORT) # Read in URI, convert to SURT # surt(uriIn) # Get SURTed URI lines in CDXJ # Read CDXJ # Do bin search to find relevant lines # read IPFS hash from relevant lines (header, payload)