def convert(fname, outfname, tmpdir, cmode): a = AFile(fname, arc.ARC_FILE_DETECT_COMPRESSION, tmpdir) if (not (a)): print "ARC file not found " return if (cmode): cmode = warc.WARC_FILE_COMPRESSED_GZIP else: cmode = warc.WARC_FILE_UNCOMPRESSED w = WFile(outfname, 16 * 1024 * 1024 * 1024, warc.WARC_FILE_WRITER, cmode, tmpdir) if w == None: print "given temporary directory does not exist " a.destroy() return while (a.hasMoreRecords()): ar = a.nextRecord() if ar == None: print "bad ARC file" a.destroy() w.destroy() return wr = WRecord() if wr == None: print "can not create WARC record object" a.destroy() w.destroy() ar.destroy() return wr.setRecordType(warc.WARC_RESPONSE_RECORD) uri = ar.getUrl() wr.setTargetUri(uri, len(uri)) date = ar.getCreationDate() wr.setDateFromArc(date, len(date)) mime = ar.getMimeType() wr.setContentType(mime, len(mime)) ip = ar.getIpAddress() wr.setIpAddress(ip, len(ip)) s = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.localtime()) sh = sha.new(uri + s) rid = sh.hexdigest() rid = "uuid:" + rid wr.setRecordId(rid, len(rid)) if (ar.transferContent(wr, a)): print "Unable to pass content to the WRecord" a.destroy() w.destroy() ar.destroy() return if (w.storeRecord(wr)): print "failed to write WRecord" a.destroy() w.destroy() ar.destroy() return ar.destroy() wr.destroy() a.destroy() w.destroy()