class IPFSWARCRecorder(BaseWARCRecorder): def __init__(self, warcdir, ipfs, redis): super(IPFSWARCRecorder, self).__init__() self.warcdir = warcdir self.ipfs = ipfs self.redisindex = RedisIndexer(redis, 'ipfs:cdxj') # experimental dedup support #self.dedup = self.redisindex try: os.makedirs(warcdir) except: pass def write_records(self): resp_uuid = str(uuid.uuid1()) resp_id = self._make_warc_id(resp_uuid) req_uuid = str(uuid.uuid1()) req_id = self._make_warc_id(req_uuid) filename = os.path.join(self.warcdir, resp_uuid + '.warc.gz') with open(filename, 'w') as out: self._write_warc_response(out, warc_id=resp_id) out.flush() # for now, not writing 'request' #with open(os.path.join(self.warcdir, req_uuid + '.warc.gz'), 'w') as out: # self._write_warc_request(out, warc_id=req_id, concur_id=resp_id) with open(filename, 'r') as stream: stream = CustomNameStream(stream, quote_plus(self.url)) res = self.ipfs.add(stream) if not res: print('IPFS ADD FAILED') else: path = 'ipfs://' + res['Hash'] self.redisindex.add_record(stream, path) os.remove(filename)
def __init__(self, warcdir, ipfs, redis): super(IPFSWARCRecorder, self).__init__() self.warcdir = warcdir self.ipfs = ipfs self.redisindex = RedisIndexer(redis, 'ipfs:cdxj') # experimental dedup support #self.dedup = self.redisindex try: os.makedirs(warcdir) except: pass