예제 #1
0
파일: test_dump.py 프로젝트: pedak/sync-oai
 def test00_dump_creation(self):
     i=Inventory()
     i.add( ResourceFile('http://ex.org/a', size=1, file='resync/test/testdata/a') )
     i.add( ResourceFile('http://ex.org/b', size=2, file='resync/test/testdata/b') )
     d=Dump()
     d.check_files(inventory=i)
     self.assertEqual(d.total_size, 28)
예제 #2
0
 def test04_dump_multi_file_max_size(self):
     rl = ResourceList()
     for letter in map(chr, range(ord('a'), ord('l') + 1)):
         uri = 'http://ex.org/%s' % (letter)
         fname = 'tests/testdata/a_to_z/%s' % (letter)
         rl.add(Resource(uri, path=fname))
     self.assertEqual(len(rl), 12)
     d2 = Dump(rl)
     tmpbase = os.path.join(self.tmpdir, 'test0f_')
     d2.max_size = 2000
     n = d2.write(tmpbase)
     self.assertEqual(n, 2, 'expect to write 2 dump files')
     self.assertTrue(os.path.isfile(tmpbase + '00000.zip'))
     self.assertTrue(os.path.isfile(tmpbase + '00001.zip'))
     # Look at the first file in detail
     zipf = tmpbase + '00000.zip'
     zo = zipfile.ZipFile(zipf, 'r')
     self.assertEqual(zo.namelist(),
                      ['manifest.xml', 'a', 'b', 'c', 'd', 'e', 'f'])
     #self.assertEqual( zo.getinfo('manifest.xml').file_size, 470 )
     self.assertEqual(zo.getinfo('a').file_size, 9)
     self.assertEqual(zo.getinfo('b').file_size, 1116)
     self.assertEqual(zo.getinfo('c').file_size, 32)
     self.assertEqual(zo.getinfo('d').file_size, 13)
     self.assertEqual(zo.getinfo('e').file_size, 20)
     self.assertEqual(zo.getinfo('f').file_size, 1625)
     zo.close()
     os.unlink(zipf)
     # Check second and third files have expected contents
     zipf = tmpbase + '00001.zip'
     zo = zipfile.ZipFile(zipf, 'r')
     self.assertEqual(zo.namelist(),
                      ['manifest.xml', 'g', 'h', 'i', 'j', 'k', 'l'])
     zo.close()
     os.unlink(zipf)
예제 #3
0
    def write_resource_list(self,
                            paths=None,
                            outfile=None,
                            links=None,
                            dump=None):
        """Write a Resource List or a Resource Dump for files on local disk

        Set of resources included is based on paths setting or else the mappings. 
        Optionally links can be added. Output will be to stdout unless outfile
        is specified.
        
        If dump is true then a Resource Dump is written instead of a Resource
        List. If outfile is not set then self.default_resource_dump will be used.
        """
        rl = self.build_resource_list(paths=paths, set_path=dump)
        if (links is not None):
            rl.ln = links
        if (dump):
            if (outfile is None):
                outfile = self.default_resource_dump
            self.logger.info("Writing resource dump to %s..." % (dump))
            d = Dump(format=self.dump_format)
            d.write(resource_list=rl, dumpfile=outfile)
        else:
            if (outfile is None):
                try:
                    print rl.as_xml()
                except ListBaseIndexError as e:
                    raise ClientFatalError(
                        "%s. Use --output option to specify base name for output files."
                        % str(e))
            else:
                rl.write(basename=outfile)
예제 #4
0
파일: client.py 프로젝트: pedak/resdbp
 def write_dump_if_requested(self,inventory,dump):
     if (dump is None):
         return
     if (self.verbose):
         print "Writing dump to %s..." % (dump)
     d = Dump(format=self.dump_format)
     d.write(inventory=inventory,dumpfile=dump)
예제 #5
0
 def test11_bad_size(self):
     rl = ResourceList()
     rl.add(
         Resource('http://ex.org/a', length=9999, path='tests/testdata/a'))
     d = Dump(rl)
     self.assertTrue(d.check_files(check_length=False))
     self.assertRaises(DumpError, d.check_files)
예제 #6
0
 def test02_dump_check_files(self):
     cl=ChangeList()
     cl.add( Resource('http://ex.org/a', length=7, path='resync/test/testdata/a', change="updated") )
     cl.add( Resource('http://ex.org/b', length=21, path='resync/test/testdata/b', change="updated") )
     d=Dump(resources=cl)
     self.assertTrue(d.check_files())
     self.assertEqual(d.total_size, 28)
예제 #7
0
 def test00_dump_creation(self):
     i=ResourceList()
     i.add( Resource('http://ex.org/a', size=1, path='resync/test/testdata/a') )
     i.add( Resource('http://ex.org/b', size=2, path='resync/test/testdata/b') )
     d=Dump()
     d.check_files(resourcelist=i)
     self.assertEqual(d.total_size, 28)
예제 #8
0
파일: client.py 프로젝트: EHRI/resync
    def write_resource_list(self,paths=None,outfile=None,links=None,dump=None):
        """Write a Resource List or a Resource Dump for files on local disk

        Set of resources included is based on paths setting or else the mappings. 
        Optionally links can be added. Output will be to stdout unless outfile
        is specified.
        
        If dump is true then a Resource Dump is written instead of a Resource
        List. If outfile is not set then self.default_resource_dump will be used.
        """
        rl = self.build_resource_list(paths=paths,set_path=dump)
        if (links is not None):
            rl.ln = links
        if (dump):
            if (outfile is None):
                outfile = self.default_resource_dump
            self.logger.info("Writing resource dump to %s..." % (dump))
            d = Dump(format=self.dump_format)
            d.write(resource_list=rl,dumpfile=outfile)
        else:
            if (outfile is None):
                try:
                    print rl.as_xml()
                except ListBaseIndexError as e:
                    raise ClientFatalError("%s. Use --output option to specify base name for output files." % str(e))
            else:
                rl.write(basename=outfile)
예제 #9
0
 def test00_dump_creation(self):
     i = ResourceList()
     i.add(Resource("http://ex.org/a", length=1, path="resync/test/testdata/a"))
     i.add(Resource("http://ex.org/b", length=2, path="resync/test/testdata/b"))
     d = Dump()
     d.check_files(resource_list=i)
     self.assertEqual(d.total_size, 28)
예제 #10
0
 def test04_dump_multi_file_max_size(self):
     rl=ResourceList()
     for letter in map(chr,xrange(ord('a'),ord('l')+1)):
         uri='http://ex.org/%s' % (letter)
         fname='resync/test/testdata/a_to_z/%s' % (letter)
         rl.add( Resource(uri, path=fname) )
     self.assertEqual( len(rl), 12 )
     d2=Dump(rl) 
     tmpbase=os.path.join(self.tmpdir,'test0f_')
     d2.max_size=2000
     n=d2.write(tmpbase)
     self.assertEqual( n, 2, 'expect to write 2 dump files' )
     self.assertTrue( os.path.isfile(tmpbase+'00000.zip') )
     self.assertTrue( os.path.isfile(tmpbase+'00001.zip') )
     # Look at the first file in detail
     zipf=tmpbase+'00000.zip'
     zo=zipfile.ZipFile(zipf,'r')
     self.assertEqual( zo.namelist(), ['manifest.xml','a','b','c','d','e','f'] )
     #self.assertEqual( zo.getinfo('manifest.xml').file_size, 470 )
     self.assertEqual( zo.getinfo('a').file_size, 9 )
     self.assertEqual( zo.getinfo('b').file_size, 1116 )
     self.assertEqual( zo.getinfo('c').file_size, 32 )
     self.assertEqual( zo.getinfo('d').file_size, 13 )
     self.assertEqual( zo.getinfo('e').file_size, 20 )
     self.assertEqual( zo.getinfo('f').file_size, 1625 )
     zo.close()
     os.unlink(zipf)
     # Check second and third files have expected contents
     zipf=tmpbase+'00001.zip'
     zo=zipfile.ZipFile(zipf,'r')
     self.assertEqual( zo.namelist(), ['manifest.xml','g','h','i','j','k','l'] )
     zo.close()
     os.unlink(zipf)
예제 #11
0
 def write_dump_if_requested(self, inventory, dump):
     if (dump is None):
         return
     if (self.verbose):
         print "Writing dump to %s..." % (dump)
     d = Dump(format=self.dump_format)
     d.write(inventory=inventory, dumpfile=dump)
예제 #12
0
 def test02_dump_check_files(self):
     cl = ChangeList()
     cl.add(Resource('http://ex.org/a', length=7,
                     path='tests/testdata/a', change="updated"))
     cl.add(Resource('http://ex.org/b', length=21,
                     path='tests/testdata/b', change="updated"))
     d = Dump(resources=cl)
     self.assertTrue(d.check_files())
     self.assertEqual(d.total_size, 28)
예제 #13
0
 def test00_dump_creation(self):
     i = Inventory()
     i.add(
         ResourceFile('http://ex.org/a',
                      size=1,
                      file='resync/test/testdata/a'))
     i.add(
         ResourceFile('http://ex.org/b',
                      size=2,
                      file='resync/test/testdata/b'))
     d = Dump()
     d.check_files(inventory=i)
     self.assertEqual(d.total_size, 28)
예제 #14
0
 def test01_dump_zip_change_list(self):
     cl=ChangeDumpManifest()
     cl.add( Resource('http://ex.org/a', length=7, path='resync/test/testdata/a', change="updated") )
     cl.add( Resource('http://ex.org/b', length=21, path='resync/test/testdata/b', change="updated") )
     d=Dump()
     zipf=os.path.join(self.tmpdir,"test01_dump.zip")
     d.write_zip(cl,zipf) # positional args
     self.assertTrue( os.path.exists(zipf) )
     self.assertTrue( zipfile.is_zipfile(zipf) )
     zo=zipfile.ZipFile(zipf,'r')
     self.assertEqual( len(zo.namelist()), 3 )
     zo.close()
     os.unlink(zipf)
예제 #15
0
 def test00_dump_zip_resource_list(self):
     rl=ResourceDumpManifest()
     rl.add( Resource('http://ex.org/a', length=7, path='resync/test/testdata/a') )
     rl.add( Resource('http://ex.org/b', length=21, path='resync/test/testdata/b') )
     d=Dump()
     zipf=os.path.join(self.tmpdir,"test00_dump.zip")
     d.write_zip(resources=rl,dumpfile=zipf) # named args
     self.assertTrue( os.path.exists(zipf) )
     self.assertTrue( zipfile.is_zipfile(zipf) )
     zo=zipfile.ZipFile(zipf,'r')
     self.assertEqual( len(zo.namelist()), 3 )
     zo.close()
     os.unlink(zipf)
예제 #16
0
 def test00_dump_zip_resource_list(self):
     rl = ResourceDumpManifest()
     rl.add(Resource('http://ex.org/a', length=7, path='tests/testdata/a'))
     rl.add(Resource('http://ex.org/b', length=21, path='tests/testdata/b'))
     d = Dump()
     zipf = os.path.join(self.tmpdir, "test00_dump.zip")
     d.write_zip(resources=rl, dumpfile=zipf)  # named args
     self.assertTrue(os.path.exists(zipf))
     self.assertTrue(zipfile.is_zipfile(zipf))
     zo = zipfile.ZipFile(zipf, 'r')
     self.assertEqual(len(zo.namelist()), 3)
     zo.close()
     os.unlink(zipf)
예제 #17
0
 def test01_dump_zip_change_list(self):
     cl = ChangeDumpManifest()
     cl.add(Resource('http://ex.org/a', length=7,
                     path='tests/testdata/a', change="updated"))
     cl.add(Resource('http://ex.org/b', length=21,
                     path='tests/testdata/b', change="updated"))
     d = Dump()
     zipf = os.path.join(self.tmpdir, "test01_dump.zip")
     d.write_zip(cl, zipf)  # positional args
     self.assertTrue(os.path.exists(zipf))
     self.assertTrue(zipfile.is_zipfile(zipf))
     zo = zipfile.ZipFile(zipf, 'r')
     self.assertEqual(len(zo.namelist()), 3)
     zo.close()
     os.unlink(zipf)
예제 #18
0
 def test03_dump_multi_file_max_size(self):
     rl = ResourceList()
     for letter in map(chr, range(ord('a'), ord('l') + 1)):
         uri = 'http://ex.org/%s' % (letter)
         fname = 'tests/testdata/a_to_z/%s' % (letter)
         rl.add(Resource(uri, path=fname))
     self.assertEqual(len(rl), 12)
     #d=Dump(rl)
     #tmpdir=tempfile.mkdtemp()
     #tmpbase=os.path.join(tmpdir,'base')
     #d.max_size=2000 # start new zip after size exceeds 2000 bytes
     #n=d.write(tmpbase)
     #self.assertEqual( n, 2, 'expect to write 2 dump files' )
     #
     # Now repeat with large size limit but small number of files limit
     d2 = Dump(rl)
     tmpbase = os.path.join(self.tmpdir, 'test03_')
     d2.max_files = 4
     n = d2.write(tmpbase)
     self.assertEqual(n, 3, 'expect to write 3 dump files')
     self.assertTrue(os.path.isfile(tmpbase + '00000.zip'))
     self.assertTrue(os.path.isfile(tmpbase + '00001.zip'))
     self.assertTrue(os.path.isfile(tmpbase + '00002.zip'))
     # Look at the first file in detail
     zipf = tmpbase + '00000.zip'
     zo = zipfile.ZipFile(zipf, 'r')
     self.assertEqual(zo.namelist(), ['manifest.xml', 'a', 'b', 'c', 'd'])
     #self.assertEqual( zo.getinfo('manifest.xml').file_size, 470 )
     self.assertEqual(zo.getinfo('a').file_size, 9)
     self.assertEqual(zo.getinfo('b').file_size, 1116)
     self.assertEqual(zo.getinfo('c').file_size, 32)
     self.assertEqual(zo.getinfo('d').file_size, 13)
     zo.close()
     os.unlink(zipf)
     # Check second and third files have expected contents
     zipf = tmpbase + '00001.zip'
     zo = zipfile.ZipFile(zipf, 'r')
     self.assertEqual(zo.namelist(), ['manifest.xml', 'e', 'f', 'g', 'h'])
     zo.close()
     os.unlink(zipf)
     zipf = tmpbase + '00002.zip'
     zo = zipfile.ZipFile(zipf, 'r')
     self.assertEqual(zo.namelist(), ['manifest.xml', 'i', 'j', 'k', 'l'])
     zo.close()
     os.unlink(zipf)
예제 #19
0
 def test03_dump_multi_file_max_size(self):
     rl=ResourceList()
     for letter in map(chr,xrange(ord('a'),ord('l')+1)):
         uri='http://ex.org/%s' % (letter)
         fname='resync/test/testdata/a_to_z/%s' % (letter)
         rl.add( Resource(uri, path=fname) )
     self.assertEqual( len(rl), 12 )
     #d=Dump(rl) 
     #tmpdir=tempfile.mkdtemp()
     #tmpbase=os.path.join(tmpdir,'base')
     #d.max_size=2000 # start new zip after size exceeds 2000 bytes
     #n=d.write(tmpbase)
     #self.assertEqual( n, 2, 'expect to write 2 dump files' )
     # 
     # Now repeat with large size limit but small number of files limit
     d2=Dump(rl) 
     tmpbase=os.path.join(self.tmpdir,'test03_')
     d2.max_files=4
     n=d2.write(tmpbase)
     self.assertEqual( n, 3, 'expect to write 3 dump files' )
     self.assertTrue( os.path.isfile(tmpbase+'00000.zip') )
     self.assertTrue( os.path.isfile(tmpbase+'00001.zip') )
     self.assertTrue( os.path.isfile(tmpbase+'00002.zip') )
     # Look at the first file in detail
     zipf=tmpbase+'00000.zip'
     zo=zipfile.ZipFile(zipf,'r')
     self.assertEqual( zo.namelist(), ['manifest.xml','a','b','c','d'] )
     #self.assertEqual( zo.getinfo('manifest.xml').file_size, 470 )
     self.assertEqual( zo.getinfo('a').file_size, 9 )
     self.assertEqual( zo.getinfo('b').file_size, 1116 )
     self.assertEqual( zo.getinfo('c').file_size, 32 )
     self.assertEqual( zo.getinfo('d').file_size, 13 )
     zo.close()
     os.unlink(zipf)
     # Check second and third files have expected contents
     zipf=tmpbase+'00001.zip'
     zo=zipfile.ZipFile(zipf,'r')
     self.assertEqual( zo.namelist(), ['manifest.xml','e','f','g','h'] )
     zo.close()
     os.unlink(zipf)
     zipf=tmpbase+'00002.zip'
     zo=zipfile.ZipFile(zipf,'r')
     self.assertEqual( zo.namelist(), ['manifest.xml','i','j','k','l'] )
     zo.close()
     os.unlink(zipf)
예제 #20
0
 def test10_no_path(self):
     rl = ResourceList()
     rl.add(
         Resource('http://ex.org/a',
                  length=7,
                  path='resync/test/testdata/a'))
     rl.add(Resource('http://ex.org/b', length=21))
     d = Dump(rl)
     self.assertRaises(DumpError, d.check_files)
예제 #21
0
        def generator() -> [SitemapData, Resource]:
            resourcedump = None
            ordinal = self.find_ordinal(Capability.resourcedump.name)
            resource_count = 0
            doc_start = None
            resource_generator = self.resource_generator()
            for resource_count, resource in resource_generator(resource_metadata):
                # stuff resource into resourcedump
                if resourcedump is None:
                    # resourcedump = ResourceDumpManifest()
                    resourcedump = ResourceDump()
                    doc_start = defaults.w3c_now()
                    resourcedump.md_at = doc_start

                resourcedump.add(resource)

                # under conditions: yield the current resourcedump
                if resource_count % self.param.max_items_in_list == 0:
                    ordinal += 1
                    doc_end = defaults.w3c_now()
                    resourcedump.md_completed = doc_end
                    d = Dump(resources = resourcedump)
                    zipf = self.param.abs_metadata_path("rd_" + str(ordinal) + ".zip")

                    print (str(zipf))
                    d.write_zip(resources=resourcedump, dumpfile=zipf)
                    dumpResource = Resource(uri=str(zipf))
                    yield dumpResource
                    resourcedump = None


            # under conditions: yield the current and last resourcedump
            if resourcedump:
                ordinal += 1
                doc_end = defaults.w3c_now()
                resourcedump.md_completed = doc_end
                d = Dump()
                zipf = self.param.abs_metadata_path("rd_" + str(ordinal) + ".zip")
                print (str(zipf))
                dumpResource = Resource(uri=str(zipf))
                yield dumpResource
                d.write_zip(resources=resourcedump, dumpfile=zipf)
예제 #22
0
 def test11_bad_size(self):
     rl=ResourceList()
     rl.add( Resource('http://ex.org/a', length=9999, path='resync/test/testdata/a') )
     d=Dump(rl)
     self.assertTrue( d.check_files(check_length=False) )
     self.assertRaises( DumpError, d.check_files )
예제 #23
0
 def write_dump_if_requested(self, inventory, dump):
     if (dump is None):
         return
     self.logger.info("Writing dump to %s..." % (dump))
     d = Dump(format=self.dump_format)
     d.write(inventory=inventory, dumpfile=dump)
예제 #24
0
def __write_dump__():
    rl = __create_resourcelist__("redump")
    d = Dump(resources=rl)
    d.write(basename="rs/source/redump/rd_")
예제 #25
0
 def write_dump_if_requested(self,resourcelist,dump):
     if (dump is None):
         return
     self.logger.info("Writing dump to %s..." % (dump))
     d = Dump(format=self.dump_format)
     d.write(resourcelist=resourcelist,dumpfile=dump)
예제 #26
0
    def create_zip(self,
                   resourcelist,
                   prefix,
                   write_list=False,
                   write_manifest=True):
        """
        Dump local resources in resourcelist to a zip file with the specified prefix. The index in the zip file name
        will be 1 higher than the last zip file index with the same prefix. A manifest.xml will be included in the
        zip.
        --  The resync.Dump.write_zip method used in this method has the side effect of changing local paths in
            resourcelist into paths relative in zip.
        :param resourcelist: resources to zip
        :param prefix: prefix of the zip file
        :param write_list: True if resourcelist should be written to local disc. Default: False
        :param write_manifest: True if a separate manifest file should be written to disc, False otherwise. Default: True
        :return: the created zip as a resync.Resource.
        """

        md_at = None  # w3cdt.datetime_to_str(no_fractions=True) # attribute gets lost in read > write cycle with resync library.
        index = -1
        zipfiles = sorted(
            glob(os.path.join(self.publish_dir, prefix + "*.zip")))
        if len(zipfiles) > 0:
            last_zip_file = zipfiles[len(zipfiles) - 1]
            basename = os.path.basename(last_zip_file)
            index = int(re.findall('\d+', basename)[0])

        zip_name = "%s%05d" % (prefix, index + 1)
        if (write_list):
            # this is the given resourcelist with local paths. As such it is *not* the resourcedump_manifest.
            rl_file = open(os.path.join(self.publish_dir, zip_name + ".xml"),
                           "w")
            rl_file.write(resourcelist.as_xml())
            rl_file.close()

        zip_path = os.path.join(self.publish_dir, zip_name + ".zip")
        dump = Dump()
        dump.path_prefix = self.resource_dir
        dump.write_zip(resourcelist,
                       zip_path)  # paths in resourcelist will be stripped.
        md_completed = None  # w3cdt.datetime_to_str(no_fractions=True) # attribute gets lost in read > write cycle with resync library.
        #print "Zipped %d resources in %s" % (len(resourcelist), zip_path)

        loc = self.publish_url + zip_name + ".zip"  # mandatory
        lastmod = self.last_modified(resourcelist)  # optional
        md_type = "application/zip"  # recommended
        md_length = os.stat(zip_path).st_size
        md5 = compute_md5_for_file(zip_path)

        zip_resource = Resource(uri=loc,
                                lastmod=lastmod,
                                length=md_length,
                                md5=md5,
                                mime_type=md_type,
                                md_at=md_at,
                                md_completed=md_completed)
        if write_manifest:
            rdm = ResourceDumpManifest(resources=resourcelist.resources)
            rdm_file = open(
                os.path.join(self.publish_dir,
                             PREFIX_MANIFEST + zip_name + ".xml"), "w")
            rdm_url = self.publish_url + PREFIX_MANIFEST + zip_name + ".xml"
            rdm_file.write(rdm.as_xml())
            rdm_file.close()
            zip_resource.link_set(rel="content", href=rdm_url)

        return zip_resource
예제 #27
0
        def generator(changedump=None) -> [SitemapData, ChangeDump]:

            resource_generator = self.resource_generator()
            self.update_previous_state()
            prev_r = self.previous_resources
            curr_r = {
                resource.uri: resource
                for count, resource in resource_generator(resource_metadata)
            }
            created = [r for r in curr_r.values() if r.uri not in prev_r]
            updated = [
                r for r in curr_r.values()
                if r.uri in prev_r and r.md5 != prev_r[r.uri].md5
            ]
            deleted = [r for r in prev_r.values() if r.uri not in curr_r]
            unchang = [
                r for r in curr_r.values()
                if r.uri in prev_r and r.md5 == prev_r[r.uri].md5
            ]

            # remove lastmod from deleted resource metadata
            for resource in deleted:
                resource.lastmod = None

            num_created = len(created)
            num_updated = len(updated)
            num_deleted = len(deleted)
            tot_changes = num_created + num_updated + num_deleted
            self.observers_inform(self,
                                  ExecutorEvent.found_changes,
                                  created=num_created,
                                  updated=num_updated,
                                  deleted=num_deleted,
                                  unchanged=len(unchang))
            all_changes = {
                "created": created,
                "updated": updated,
                "deleted": deleted
            }

            ordinal = self.find_ordinal(Capability.changedump.name)

            resource_count = 0
            if changedump:
                ordinal -= 1
                resource_count = len(changedump)
                if resource_count >= self.param.max_items_in_list:
                    changedump = None
                    ordinal += 1
                    resource_count = 0

            for kv in all_changes.items():
                for resource in kv[1]:
                    if changedump is None:
                        changedump = ChangeDump()
                        changedump.md_from = self.date_changedump_from

                    resource.change = kv[
                        0]  # type of change: created, updated or deleted
                    resource.md_datetime = self.date_start_processing
                    changedump.add(resource)

                    resource_count += 1

                    # under conditions: yield the current changedump
                    if resource_count % self.param.max_items_in_list == 0:
                        ordinal += 1
                        # sitemap_data = self.finish_sitemap(ordinal, changedump)
                        d = Dump(resources=changedump)
                        # zipf = os.path.join('/tmp', "cd_" + str(ordinal) + ".zip")
                        zipf = self.param.abs_metadata_path("cd_" +
                                                            str(ordinal) +
                                                            ".zip")
                        print(str(zipf))
                        d.write_zip(resources=changedump, dumpfile=zipf)
                        doc_end = defaults.w3c_now()

                        sitemap_data = self.finish_sitemap(
                            ordinal,
                            changedump,
                            doc_start=self.date_start_processing,
                            doc_end=doc_end)
                        # dumpResource = ChangeDump(Resource(uri=str(zipf)))
                        dumpResource = ChangeDump(uri=str(zipf))
                        # yield sitemap_data, changedump
                        yield sitemap_data, dumpResource
                        # yield sitemap_data, zipf
                        # yield zipf
                        changedump = None

            # under conditions: yield the current and last changedump
            if changedump and tot_changes > 0:
                ordinal += 1
                doc_end = defaults.w3c_now()
                changedump.md_completed = doc_end
                d = Dump()
                zipf = self.param.abs_metadata_path("cd_" + str(ordinal) +
                                                    ".zip")
                print(str(zipf))
                sitemap_data = self.finish_sitemap(
                    ordinal,
                    changedump,
                    doc_start=self.date_start_processing,
                    doc_end=doc_end)
                # dumpResource = ChangeDump(Resource(uri=str(zipf)))
                dumpResource = ChangeDump(uri=str(zipf))
                # dumpResource = ChangeDump(uri=str(zipf))
                # yield sitemap_data, changedump
                yield sitemap_data, dumpResource
                # yield sitemap_data, zipf
                # yield zipf
                d.write_zip(resources=changedump, dumpfile=zipf)
예제 #28
0
파일: client.py 프로젝트: pedak/sync-oai
 def write_dump_if_requested(self, inventory, dump):
     if dump is None:
         return
     self.logger.info("Writing dump to %s..." % (dump))
     d = Dump(format=self.dump_format)
     d.write(inventory=inventory, dumpfile=dump)
예제 #29
0
파일: test_dump.py 프로젝트: EHRI/resydes
def __write_dump__():
    rl = __create_resourcelist__("redump")
    d = Dump(resources=rl)
    d.write(basename="rs/source/redump/rd_")
예제 #30
0
    def create_zip(self, resourcelist, prefix, write_list=False, write_manifest=True):
        """
        Dump local resources in resourcelist to a zip file with the specified prefix. The index in the zip file name
        will be 1 higher than the last zip file index with the same prefix. A manifest.xml will be included in the
        zip.
        --  The resync.Dump.write_zip method used in this method has the side effect of changing local paths in
            resourcelist into paths relative in zip.
        :param resourcelist: resources to zip
        :param prefix: prefix of the zip file
        :param write_list: True if resourcelist should be written to local disc. Default: False
        :param write_manifest: True if a separate manifest file should be written to disc, False otherwise. Default: True
        :return: the created zip as a resync.Resource.
        """

        md_at = (
            None
        )  # w3cdt.datetime_to_str(no_fractions=True) # attribute gets lost in read > write cycle with resync library.
        index = -1
        zipfiles = sorted(glob(os.path.join(self.publish_dir, prefix + "*.zip")))
        if len(zipfiles) > 0:
            last_zip_file = zipfiles[len(zipfiles) - 1]
            basename = os.path.basename(last_zip_file)
            index = int(re.findall("\d+", basename)[0])

        zip_name = "%s%05d" % (prefix, index + 1)
        if write_list:
            # this is the given resourcelist with local paths. As such it is *not* the resourcedump_manifest.
            rl_file = open(os.path.join(self.publish_dir, zip_name + ".xml"), "w")
            rl_file.write(resourcelist.as_xml())
            rl_file.close()

        zip_path = os.path.join(self.publish_dir, zip_name + ".zip")
        dump = Dump()
        dump.path_prefix = self.resource_dir
        dump.write_zip(resourcelist, zip_path)  # paths in resourcelist will be stripped.
        md_completed = (
            None
        )  # w3cdt.datetime_to_str(no_fractions=True) # attribute gets lost in read > write cycle with resync library.
        # print "Zipped %d resources in %s" % (len(resourcelist), zip_path)

        loc = self.publish_url + zip_name + ".zip"  # mandatory
        lastmod = self.last_modified(resourcelist)  # optional
        md_type = "application/zip"  # recommended
        md_length = os.stat(zip_path).st_size
        md5 = compute_md5_for_file(zip_path)

        zip_resource = Resource(
            uri=loc,
            lastmod=lastmod,
            length=md_length,
            md5=md5,
            mime_type=md_type,
            md_at=md_at,
            md_completed=md_completed,
        )
        if write_manifest:
            rdm = ResourceDumpManifest(resources=resourcelist.resources)
            rdm_file = open(os.path.join(self.publish_dir, PREFIX_MANIFEST + zip_name + ".xml"), "w")
            rdm_url = self.publish_url + PREFIX_MANIFEST + zip_name + ".xml"
            rdm_file.write(rdm.as_xml())
            rdm_file.close()
            zip_resource.link_set(rel="content", href=rdm_url)

        return zip_resource