def write_warc(self, resources=None, dumpfile=None): """Write a WARC dump file. WARC support is not part of ResourceSync v1.0 (Z39.99 2014) but is left in this library for experimentation. """ # Load library late as we want to be able to run rest of code # without this installed try: from warc import WARCFile, WARCHeader, WARCRecord except: raise DumpError("Failed to load WARC library") wf = WARCFile(dumpfile, mode="w", compress=self.compress) # Add all files in the resources for resource in resources: wh = WARCHeader({}) wh.url = resource.uri wh.ip_address = None wh.date = resource.lastmod wh.content_type = 'text/plain' wh.result_code = 200 wh.checksum = 'aabbcc' wh.location = self.archive_path(resource.path) wf.write_record(WARCRecord(header=wh, payload=resource.path)) wf.close() warcsize = os.path.getsize(dumpfile) self.logging.info("Wrote WARC file dump %s with size %d bytes" % (dumpfile, warcsize))
def write_warc(self, resources=None, dumpfile=None): """Write a WARC dump file WARC support is not part of ResourceSync v1.0 (Z39.99 2014) but is left in this library for experimentation. """ # Load library late as we want to be able to run rest of code # without this installed try: from warc import WARCFile,WARCHeader,WARCRecord except: raise DumpError("Failed to load WARC library") wf = WARCFile(dumpfile, mode="w", compress=self.compress) # Add all files in the resources for resource in resources: wh = WARCHeader({}) wh.url = resource.uri wh.ip_address = None wh.date = resource.lastmod wh.content_type = 'text/plain' wh.result_code = 200 wh.checksum = 'aabbcc' wh.location = self.archive_path(resource.path) wf.write_record( WARCRecord( header=wh, payload=resource.path ) ) wf.close() warcsize = os.path.getsize(dumpfile) self.logging.info("Wrote WARC file dump %s with size %d bytes" % (dumpfile,warcsize))
def write_warc(self, inventory=None, dumpfile=None): """Write a WARC dump file""" # Load library late as we want to be able to run rest of code # without this installed try: from warc import WARCFile,WARCHeader,WARCRecord except: raise DumpError("Failed to load WARC library") wf = WARCFile(dumpfile, mode="w", compress=self.compress) # Add all files in the inventory for resource in inventory: wh = WARCHeader({}) wh.url = resource.uri wh.ip_address = None wh.date = resource.lastmod wh.content_type = 'text/plain' wh.result_code = 200 wh.checksum = 'aabbcc' wh.location = 'loc' wf.write_record( WARCRecord( header=wh, payload=resource.file ) ) wf.close() warcsize = os.path.getsize(dumpfile) print "Wrote WARC file dump %s with size %d bytes" % (dumpfile,warcsize)