コード例 #1
0
ファイル: client.py プロジェクト: JordanReiter/resync
 def changelist_sitemap(self,outfile=None,ref_sitemap=None,newref_sitemap=None,
                       empty=None,capabilities=None,dump=None):
     changelist = ChangeList()
     changelist.capabilities = capabilities
     if (not empty):
         # 1. Get and parse reference sitemap
         old_inv = self.read_reference_sitemap(ref_sitemap)
         # 2. Depending on whether a newref_sitemap was specified, either read that 
         # or build resourcelist from files on disk
         if (newref_sitemap is None):
             # Get resourcelist from disk
             new_inv = self.resourcelist
         else:
             new_inv = self.read_reference_sitemap(newref_sitemap,name='new reference')
         # 3. Calculate changelist
         (same,updated,deleted,created)=old_inv.compare(new_inv)   
         changelist.add_changed_resources( updated, change='updated' )
         changelist.add_changed_resources( deleted, change='deleted' )
         changelist.add_changed_resources( created, change='created' )
     # 4. Write out changelist
     s = Sitemap(pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper)
     if (self.max_sitemap_entries is not None):
         s.max_sitemap_entries = self.max_sitemap_entries
     if (outfile is None):
         print s.resources_as_xml(changelist,changelist=True)
     else:
         s.write(changelist,basename=outfile,changelist=True)
     self.write_dump_if_requested(changelist,dump)
コード例 #2
0
ファイル: client.py プロジェクト: pedak/sync-oai
 def changeset_sitemap(
     self, outfile=None, ref_sitemap=None, newref_sitemap=None, empty=None, capabilities=None, dump=None
 ):
     changeset = ChangeSet()
     changeset.capabilities = capabilities
     if not empty:
         # 1. Get and parse reference sitemap
         old_inv = self.read_reference_sitemap(ref_sitemap)
         # 2. Depending on whether a newref_sitemap was specified, either read that
         # or build inventory from files on disk
         if newref_sitemap is None:
             # Get inventory from disk
             new_inv = self.inventory
         else:
             new_inv = self.read_reference_sitemap(newref_sitemap, name="new reference")
         # 3. Calculate changeset
         (same, updated, deleted, created) = old_inv.compare(new_inv)
         changeset.add_changed_resources(updated, changetype="UPDATED")
         changeset.add_changed_resources(deleted, changetype="DELETED")
         changeset.add_changed_resources(created, changetype="CREATED")
     # 4. Write out changeset
     s = Sitemap(pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper)
     if self.max_sitemap_entries is not None:
         s.max_sitemap_entries = self.max_sitemap_entries
     if outfile is None:
         print s.resources_as_xml(changeset, changeset=True)
     else:
         s.write(changeset, basename=outfile, changeset=True)
     self.write_dump_if_requested(changeset, dump)
コード例 #3
0
ファイル: executors.py プロジェクト: EHRI/rspub-core
 def save_sitemap(self, sitemap, path):
     # writing the string sitemap.as_xml() to disk results in encoding=ASCII on some systems.
     # due to https://docs.python.org/3.4/library/xml.etree.elementtree.html#write
     sitemap.default_capability()
     with open(path, "wb") as f:
         s = Sitemap(pretty_xml=self.para.is_saving_pretty_xml)
         s.resources_as_xml(sitemap,
                            sitemapindex=sitemap.sitemapindex,
                            fh=f)
コード例 #4
0
ファイル: client.py プロジェクト: pedak/sync-oai
 def write_sitemap(self, outfile=None, capabilities=None, dump=None):
     # Set up base_path->base_uri mappings, get inventory from disk
     i = self.inventory
     i.capabilities = capabilities
     s = Sitemap(pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper)
     if self.max_sitemap_entries is not None:
         s.max_sitemap_entries = self.max_sitemap_entries
     if outfile is None:
         print s.resources_as_xml(i, capabilities=i.capabilities)
     else:
         s.write(i, basename=outfile)
     self.write_dump_if_requested(i, dump)
コード例 #5
0
 def write_sitemap(self, outfile=None, capabilities=None, dump=None):
     # Set up base_path->base_uri mappings, get inventory from disk
     i = self.inventory
     i.capabilities = capabilities
     s = Sitemap(pretty_xml=True,
                 allow_multifile=self.allow_multifile,
                 mapper=self.mapper)
     if (self.max_sitemap_entries is not None):
         s.max_sitemap_entries = self.max_sitemap_entries
     if (outfile is None):
         print s.resources_as_xml(i, capabilities=i.capabilities)
     else:
         s.write(i, basename=outfile)
     self.write_dump_if_requested(i, dump)
コード例 #6
0
 def test2_pretty_output(self):
     ib = InventoryBuilder()
     ib.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
     i = ib.from_disk()
     s = Sitemap()
     s.pretty_xml=True
     self.assertEqual(s.resources_as_xml(i),'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\n<url><loc>http://example.org/t/file_a</loc><lastmod>2012-07-25T17:13:46Z</lastmod><rs:size>20</rs:size></url>\n<url><loc>http://example.org/t/file_b</loc><lastmod>2001-09-09T01:46:40Z</lastmod><rs:size>45</rs:size></url>\n</urlset>' )
コード例 #7
0
 def test3_with_md5(self):
     ib = InventoryBuilder(do_md5=True)
     ib.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
     i = ib.from_disk()
     s = Sitemap()
     xml = s.resources_as_xml(i)
     self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+Z</lastmod><rs:size>20</rs:size><rs:fixity type="md5">a/Jv1mYBtSjS4LR\+qoft/Q==</rs:fixity>',xml) ) #must escape + in md5
     self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+Z</lastmod><rs:size>45</rs:size><rs:fixity type="md5">RS5Uva4WJqxdbnvoGzneIQ==</rs:fixity>',xml) )
コード例 #8
0
ファイル: client.py プロジェクト: semantalytics/simulator
    def changeset_sitemap(self,outfile=None,ref_sitemap=None,capabilities=None,
                          dump=None):
        # 1. Get and parse reference sitemap
        rs = Sitemap(verbose=self.verbose, allow_multifile=self.allow_multifile, 
                     mapper=self.mapper)
        if (self.verbose):
            print "Reading sitemap(s) from %s ..." % (ref_sitemap)
        ri = rs.read(ref_sitemap)
        num_entries = len(ri)
        print "Read reference sitemap with %d entries in %d sitemaps" % (num_entries,rs.sitemaps_created)
        if (self.verbose):
            to_show = 100
            override_str = ' (override with --max-sitemap-entries)'
            if (self.max_sitemap_entries):
                to_show = self.max_sitemap_entries
                override_str = ''
            if (num_entries>to_show):
                print "Showing first %d entries sorted by URI%s..." % (to_show,override_str)
            n=0
            for r in i:
                print r
                n+=1
                if ( n >= to_show ):
                    break
        # 2. Set up base_path->base_uri mappings, get inventory from disk
        disk_inventory = self.inventory
        # 3. Calculate changeset
        (same,updated,deleted,created)=ri.compare(disk_inventory)   
        changeset = ChangeSet()
        changeset.capabilities = capabilities
        changeset.add_changed_resources( updated, changetype='updated' )
        changeset.add_changed_resources( deleted, changetype='deleted' )
        changeset.add_changed_resources( created, changetype='created' )
        # 4. Write out changeset
        s = Sitemap(verbose=self.verbose, pretty_xml=True, allow_multifile=self.allow_multifile,
	            mapper=self.mapper)
        if (self.max_sitemap_entries is not None):
            s.max_sitemap_entries = self.max_sitemap_entries
        if (outfile is None):
            print s.resources_as_xml(changeset)
        else:
            s.write(changeset,basename=outfile)
        self.write_dump_if_requested(changeset,dump)
コード例 #9
0
 def test2_pretty_output(self):
     ib = InventoryBuilder()
     ib.mapper = Mapper(
         ['http://example.org/t', 'resync/test/testdata/dir1'])
     i = ib.from_disk()
     s = Sitemap()
     s.pretty_xml = True
     self.assertEqual(
         s.resources_as_xml(i),
         '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\n<url><loc>http://example.org/t/file_a</loc><lastmod>2012-07-25T17:13:46Z</lastmod><rs:size>20</rs:size></url>\n<url><loc>http://example.org/t/file_b</loc><lastmod>2001-09-09T01:46:40Z</lastmod><rs:size>45</rs:size></url>\n</urlset>'
     )
コード例 #10
0
 def changeset_sitemap(self,
                       outfile=None,
                       ref_sitemap=None,
                       newref_sitemap=None,
                       empty=None,
                       capabilities=None,
                       dump=None):
     changeset = ChangeSet()
     changeset.capabilities = capabilities
     if (not empty):
         # 1. Get and parse reference sitemap
         old_inv = self.read_reference_sitemap(ref_sitemap)
         # 2. Depending on whether a newref_sitemap was specified, either read that
         # or build inventory from files on disk
         if (newref_sitemap is None):
             # Get inventory from disk
             new_inv = self.inventory
         else:
             new_inv = self.read_reference_sitemap(newref_sitemap,
                                                   name='new reference')
         # 3. Calculate changeset
         (same, updated, deleted, created) = old_inv.compare(new_inv)
         changeset.add_changed_resources(updated, changetype='UPDATED')
         changeset.add_changed_resources(deleted, changetype='DELETED')
         changeset.add_changed_resources(created, changetype='CREATED')
     # 4. Write out changeset
     s = Sitemap(pretty_xml=True,
                 allow_multifile=self.allow_multifile,
                 mapper=self.mapper)
     if (self.max_sitemap_entries is not None):
         s.max_sitemap_entries = self.max_sitemap_entries
     if (outfile is None):
         print s.resources_as_xml(changeset, changeset=True)
     else:
         s.write(changeset, basename=outfile, changeset=True)
     self.write_dump_if_requested(changeset, dump)
コード例 #11
0
    def convert_to_xml(self, resources, sitemap_index=False, fh=None):
        """Write or return XML for a set of resources in sitemap format.

        Arguments:
        - resources - either an iterable or iterator of Resource objects;
                      if there an md attribute this will go to <rs:md>
                      if there an ln attribute this will go to <rs:ln>
        - sitemapindex - set True to write sitemapindex instead of sitemap
        - fh - write to filehandle fh instead of returning string
        """
        sitemap = Sitemap()
        self.res_container = resources
        if len(self.res_container) == 0:
            return
        return sitemap.resources_as_xml(self.res_container,
                                        sitemapindex=sitemap_index,
                                        fh=fh)
コード例 #12
0
 def test3_with_md5(self):
     ib = InventoryBuilder(do_md5=True)
     ib.mapper = Mapper(
         ['http://example.org/t', 'resync/test/testdata/dir1'])
     i = ib.from_disk()
     s = Sitemap()
     xml = s.resources_as_xml(i)
     self.assertNotEqual(
         None,
         re.search(
             '<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+Z</lastmod><rs:size>20</rs:size><rs:fixity type="md5">a/Jv1mYBtSjS4LR\+qoft/Q==</rs:fixity>',
             xml))  #must escape + in md5
     self.assertNotEqual(
         None,
         re.search(
             '<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+Z</lastmod><rs:size>45</rs:size><rs:fixity type="md5">RS5Uva4WJqxdbnvoGzneIQ==</rs:fixity>',
             xml))