Ejemplo n.º 1
0
    def write_warc(self, resources=None, dumpfile=None):
        """Write a WARC dump file.

        WARC support is not part of ResourceSync v1.0 (Z39.99 2014) but is left
        in this library for experimentation.
        """
        # Load library late as we want to be able to run rest of code
        # without this installed
        try:
            from warc import WARCFile, WARCHeader, WARCRecord
        except:
            raise DumpError("Failed to load WARC library")
        wf = WARCFile(dumpfile, mode="w", compress=self.compress)
        # Add all files in the resources
        for resource in resources:
            wh = WARCHeader({})
            wh.url = resource.uri
            wh.ip_address = None
            wh.date = resource.lastmod
            wh.content_type = 'text/plain'
            wh.result_code = 200
            wh.checksum = 'aabbcc'
            wh.location = self.archive_path(resource.path)
            wf.write_record(WARCRecord(header=wh, payload=resource.path))
        wf.close()
        warcsize = os.path.getsize(dumpfile)
        self.logging.info("Wrote WARC file dump %s with size %d bytes" %
                          (dumpfile, warcsize))
Ejemplo n.º 2
0
Archivo: dump.py Proyecto: EHRI/resync
    def write_warc(self, resources=None, dumpfile=None):
        """Write a WARC dump file

        WARC support is not part of ResourceSync v1.0 (Z39.99 2014) but is left
        in this library for experimentation.
        """
        # Load library late as we want to be able to run rest of code 
        # without this installed
        try:
            from warc import WARCFile,WARCHeader,WARCRecord
        except:
            raise DumpError("Failed to load WARC library")
        wf = WARCFile(dumpfile, mode="w", compress=self.compress)
        # Add all files in the resources
        for resource in resources:
            wh = WARCHeader({})
            wh.url = resource.uri
            wh.ip_address = None
            wh.date = resource.lastmod
            wh.content_type = 'text/plain'
            wh.result_code = 200
            wh.checksum = 'aabbcc'
            wh.location = self.archive_path(resource.path)
            wf.write_record( WARCRecord( header=wh, payload=resource.path ) )
        wf.close()
        warcsize = os.path.getsize(dumpfile)
        self.logging.info("Wrote WARC file dump %s with size %d bytes" % (dumpfile,warcsize))
Ejemplo n.º 3
0
 def write_warc(self, inventory=None, dumpfile=None):
     """Write a WARC dump file"""
     # Load library late as we want to be able to run rest of code 
     # without this installed
     try:
         from warc import WARCFile,WARCHeader,WARCRecord
     except:
         raise DumpError("Failed to load WARC library")
     wf = WARCFile(dumpfile, mode="w", compress=self.compress)
     # Add all files in the inventory
     for resource in inventory:
         wh = WARCHeader({})
         wh.url = resource.uri
         wh.ip_address = None
         wh.date = resource.lastmod
         wh.content_type = 'text/plain'
         wh.result_code = 200
         wh.checksum = 'aabbcc'
         wh.location = 'loc'
         wf.write_record( WARCRecord( header=wh, payload=resource.file ) )
     wf.close()
     warcsize = os.path.getsize(dumpfile)
     print "Wrote WARC file dump %s with size %d bytes" % (dumpfile,warcsize)
Ejemplo n.º 4
0
Archivo: dump.py Proyecto: pedak/resdbp
 def write_warc(self, inventory=None, dumpfile=None):
     """Write a WARC dump file"""
     # Load library late as we want to be able to run rest of code 
     # without this installed
     try:
         from warc import WARCFile,WARCHeader,WARCRecord
     except:
         raise DumpError("Failed to load WARC library")
     wf = WARCFile(dumpfile, mode="w", compress=self.compress)
     # Add all files in the inventory
     for resource in inventory:
         wh = WARCHeader({})
         wh.url = resource.uri
         wh.ip_address = None
         wh.date = resource.lastmod
         wh.content_type = 'text/plain'
         wh.result_code = 200
         wh.checksum = 'aabbcc'
         wh.location = 'loc'
         wf.write_record( WARCRecord( header=wh, payload=resource.file ) )
     wf.close()
     warcsize = os.path.getsize(dumpfile)
     print "Wrote WARC file dump %s with size %d bytes" % (dumpfile,warcsize)