Exemple #1
0
    def write_sitemap(self,outfile=None,capabilities=None,dump=None):
        # Set up base_path->base_uri mappings, get inventory from disk
        i = self.inventory
        i.capabilities = capabilities
        s=Sitemap(verbose=self.verbose, pretty_xml=True, allow_multifile=self.allow_multifile,
	          mapper=self.mapper)
        if (self.max_sitemap_entries is not None):
            s.max_sitemap_entries = self.max_sitemap_entries
        if (outfile is None):
            print s.inventory_as_xml(i)
        else:
            s.write(i,basename=outfile)
        self.write_dump_if_requested(i,dump)
Exemple #2
0
 def changeset_sitemap(self,
                       outfile=None,
                       ref_sitemap=None,
                       capabilities=None,
                       dump=None):
     # 1. Get and parse reference sitemap
     rs = Sitemap(verbose=self.verbose,
                  allow_multifile=self.allow_multifile,
                  mapper=self.mapper)
     if (self.verbose):
         print "Reading sitemap(s) from %s ..." % (ref_sitemap)
     ri = rs.read(ref_sitemap)
     num_entries = len(ri)
     print "Read reference sitemap with %d entries in %d sitemaps" % (
         num_entries, rs.sitemaps_created)
     if (self.verbose):
         to_show = 100
         override_str = ' (override with --max-sitemap-entries)'
         if (self.max_sitemap_entries):
             to_show = self.max_sitemap_entries
             override_str = ''
         if (num_entries > to_show):
             print "Showing first %d entries sorted by URI%s..." % (
                 to_show, override_str)
         n = 0
         for r in ri.resource_uris():
             print ri.resources[r]
             n += 1
             if (n >= to_show):
                 break
     # 2. Set up base_path->base_uri mappings, get inventory from disk
     disk_inventory = self.inventory
     # 3. Calculate changeset
     (num_same, updated, deleted, created) = ri.compare(disk_inventory)
     changeset = Inventory()
     changeset.capabilities = capabilities
     changeset.add(disk_inventory.changeset(updated, changetype='updated'))
     changeset.add(ri.changeset(deleted, changetype='deleted'))
     changeset.add(disk_inventory.changeset(created, changetype='created'))
     # 4. Write out changeset
     s = Sitemap(verbose=self.verbose,
                 pretty_xml=True,
                 allow_multifile=self.allow_multifile,
                 mapper=self.mapper)
     if (self.max_sitemap_entries is not None):
         s.max_sitemap_entries = self.max_sitemap_entries
     if (outfile is None):
         print s.inventory_as_xml(changeset)
     else:
         s.write(changeset, basename=outfile)
     self.write_dump_if_requested(changeset, dump)
Exemple #3
0
 def write_sitemap(self, outfile=None, capabilities=None, dump=None):
     # Set up base_path->base_uri mappings, get inventory from disk
     i = self.inventory
     i.capabilities = capabilities
     s = Sitemap(verbose=self.verbose,
                 pretty_xml=True,
                 allow_multifile=self.allow_multifile,
                 mapper=self.mapper)
     if (self.max_sitemap_entries is not None):
         s.max_sitemap_entries = self.max_sitemap_entries
     if (outfile is None):
         print s.inventory_as_xml(i)
     else:
         s.write(i, basename=outfile)
     self.write_dump_if_requested(i, dump)
 def test2_pretty_output(self):
     ib = InventoryBuilder()
     ib.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
     i = ib.from_disk()
     s = Sitemap()
     s.pretty_xml=True
     self.assertEqual(s.inventory_as_xml(i),'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://resourcesync.org/change/0.1">\n<url><loc>http://example.org/t/file_a</loc><lastmod>2012-03-14T17:46:04</lastmod><rs:size>20</rs:size></url>\n<url><loc>http://example.org/t/file_b</loc><lastmod>2012-03-14T17:46:25</lastmod><rs:size>45</rs:size></url>\n</urlset>' )
 def test3_with_md5(self):
     ib = InventoryBuilder(do_md5=True)
     i = ib.from_disk('resync/test/testdata/dir1','http://example.org/t')
     s = Sitemap()
     xml = s.inventory_as_xml(i)
     self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+</lastmod><rs:size>20</rs:size><rs:md5>6bf26fd66601b528d2e0b47eaa87edfd</rs:md5>',xml), 'size/checksum for file_a')
     self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+</lastmod><rs:size>45</rs:size><rs:md5>452e54bdae1626ac5d6e7be81b39de21</rs:md5>',xml), 'size/checksum for file_b' )
Exemple #6
0
 def test2_pretty_output(self):
     ib = InventoryBuilder()
     i = ib.from_disk('resync/test/testdata/dir1', 'http://example.org/t')
     s = Sitemap()
     s.pretty_xml = True
     self.assertEqual(
         s.inventory_as_xml(i),
         '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://resourcesync.org/change/0.1">\n<url><loc>http://example.org/t/file_a</loc><lastmod>2012-03-14T17:46:04</lastmod><rs:size>20</rs:size></url>\n<url><loc>http://example.org/t/file_b</loc><lastmod>2012-03-14T17:46:25</lastmod><rs:size>45</rs:size></url>\n</urlset>'
     )
Exemple #7
0
    def changeset_sitemap(self,outfile=None,ref_sitemap=None,capabilities=None,
                          dump=None):
        # 1. Get and parse reference sitemap
        rs = Sitemap(verbose=self.verbose, allow_multifile=self.allow_multifile, 
                     mapper=self.mapper)
        if (self.verbose):
            print "Reading sitemap(s) from %s ..." % (ref_sitemap)
        ri = rs.read(ref_sitemap)
        num_entries = len(ri)
        print "Read reference sitemap with %d entries in %d sitemaps" % (num_entries,rs.sitemaps_created)
        if (self.verbose):
            to_show = 100
            override_str = ' (override with --max-sitemap-entries)'
            if (self.max_sitemap_entries):
                to_show = self.max_sitemap_entries
                override_str = ''
            if (num_entries>to_show):
                print "Showing first %d entries sorted by URI%s..." % (to_show,override_str)
            n=0
            for r in ri.resource_uris():
                print ri.resources[r]
                n+=1
                if ( n >= to_show ):
                    break
        # 2. Set up base_path->base_uri mappings, get inventory from disk
        disk_inventory = self.inventory
        # 3. Calculate changeset
        (num_same,updated,deleted,created)=ri.compare(disk_inventory)   
        changeset = Inventory()
        changeset.capabilities = capabilities
        changeset.add( disk_inventory.changeset( updated, changetype='updated' ) )
        changeset.add( ri.changeset( deleted, changetype='deleted' ) )
        changeset.add( disk_inventory.changeset( created, changetype='created' ) )
        # 4. Write out changeset
        s = Sitemap(verbose=self.verbose, pretty_xml=True, allow_multifile=self.allow_multifile,
	            mapper=self.mapper)
        if (self.max_sitemap_entries is not None):
            s.max_sitemap_entries = self.max_sitemap_entries
        if (outfile is None):
            print s.inventory_as_xml(changeset)
        else:
            s.write(changeset,basename=outfile)
        self.write_dump_if_requested(changeset,dump)
Exemple #8
0
 def test3_with_md5(self):
     ib = InventoryBuilder(do_md5=True)
     i = ib.from_disk('resync/test/testdata/dir1', 'http://example.org/t')
     s = Sitemap()
     xml = s.inventory_as_xml(i)
     self.assertNotEqual(
         None,
         re.search(
             '<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+</lastmod><rs:size>20</rs:size><rs:md5>6bf26fd66601b528d2e0b47eaa87edfd</rs:md5>',
             xml), 'size/checksum for file_a')
     self.assertNotEqual(
         None,
         re.search(
             '<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+</lastmod><rs:size>45</rs:size><rs:md5>452e54bdae1626ac5d6e7be81b39de21</rs:md5>',
             xml), 'size/checksum for file_b')