def write_sitemap(self,outfile=None,capabilities=None,dump=None): # Set up base_path->base_uri mappings, get inventory from disk i = self.inventory i.capabilities = capabilities s=Sitemap(verbose=self.verbose, pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper) if (self.max_sitemap_entries is not None): s.max_sitemap_entries = self.max_sitemap_entries if (outfile is None): print s.inventory_as_xml(i) else: s.write(i,basename=outfile) self.write_dump_if_requested(i,dump)
def changeset_sitemap(self, outfile=None, ref_sitemap=None, capabilities=None, dump=None): # 1. Get and parse reference sitemap rs = Sitemap(verbose=self.verbose, allow_multifile=self.allow_multifile, mapper=self.mapper) if (self.verbose): print "Reading sitemap(s) from %s ..." % (ref_sitemap) ri = rs.read(ref_sitemap) num_entries = len(ri) print "Read reference sitemap with %d entries in %d sitemaps" % ( num_entries, rs.sitemaps_created) if (self.verbose): to_show = 100 override_str = ' (override with --max-sitemap-entries)' if (self.max_sitemap_entries): to_show = self.max_sitemap_entries override_str = '' if (num_entries > to_show): print "Showing first %d entries sorted by URI%s..." % ( to_show, override_str) n = 0 for r in ri.resource_uris(): print ri.resources[r] n += 1 if (n >= to_show): break # 2. Set up base_path->base_uri mappings, get inventory from disk disk_inventory = self.inventory # 3. Calculate changeset (num_same, updated, deleted, created) = ri.compare(disk_inventory) changeset = Inventory() changeset.capabilities = capabilities changeset.add(disk_inventory.changeset(updated, changetype='updated')) changeset.add(ri.changeset(deleted, changetype='deleted')) changeset.add(disk_inventory.changeset(created, changetype='created')) # 4. Write out changeset s = Sitemap(verbose=self.verbose, pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper) if (self.max_sitemap_entries is not None): s.max_sitemap_entries = self.max_sitemap_entries if (outfile is None): print s.inventory_as_xml(changeset) else: s.write(changeset, basename=outfile) self.write_dump_if_requested(changeset, dump)
def write_sitemap(self, outfile=None, capabilities=None, dump=None): # Set up base_path->base_uri mappings, get inventory from disk i = self.inventory i.capabilities = capabilities s = Sitemap(verbose=self.verbose, pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper) if (self.max_sitemap_entries is not None): s.max_sitemap_entries = self.max_sitemap_entries if (outfile is None): print s.inventory_as_xml(i) else: s.write(i, basename=outfile) self.write_dump_if_requested(i, dump)
def test2_pretty_output(self): ib = InventoryBuilder() ib.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1']) i = ib.from_disk() s = Sitemap() s.pretty_xml=True self.assertEqual(s.inventory_as_xml(i),'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://resourcesync.org/change/0.1">\n<url><loc>http://example.org/t/file_a</loc><lastmod>2012-03-14T17:46:04</lastmod><rs:size>20</rs:size></url>\n<url><loc>http://example.org/t/file_b</loc><lastmod>2012-03-14T17:46:25</lastmod><rs:size>45</rs:size></url>\n</urlset>' )
def test3_with_md5(self): ib = InventoryBuilder(do_md5=True) i = ib.from_disk('resync/test/testdata/dir1','http://example.org/t') s = Sitemap() xml = s.inventory_as_xml(i) self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+</lastmod><rs:size>20</rs:size><rs:md5>6bf26fd66601b528d2e0b47eaa87edfd</rs:md5>',xml), 'size/checksum for file_a') self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+</lastmod><rs:size>45</rs:size><rs:md5>452e54bdae1626ac5d6e7be81b39de21</rs:md5>',xml), 'size/checksum for file_b' )
def test2_pretty_output(self): ib = InventoryBuilder() i = ib.from_disk('resync/test/testdata/dir1', 'http://example.org/t') s = Sitemap() s.pretty_xml = True self.assertEqual( s.inventory_as_xml(i), '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://resourcesync.org/change/0.1">\n<url><loc>http://example.org/t/file_a</loc><lastmod>2012-03-14T17:46:04</lastmod><rs:size>20</rs:size></url>\n<url><loc>http://example.org/t/file_b</loc><lastmod>2012-03-14T17:46:25</lastmod><rs:size>45</rs:size></url>\n</urlset>' )
def changeset_sitemap(self,outfile=None,ref_sitemap=None,capabilities=None, dump=None): # 1. Get and parse reference sitemap rs = Sitemap(verbose=self.verbose, allow_multifile=self.allow_multifile, mapper=self.mapper) if (self.verbose): print "Reading sitemap(s) from %s ..." % (ref_sitemap) ri = rs.read(ref_sitemap) num_entries = len(ri) print "Read reference sitemap with %d entries in %d sitemaps" % (num_entries,rs.sitemaps_created) if (self.verbose): to_show = 100 override_str = ' (override with --max-sitemap-entries)' if (self.max_sitemap_entries): to_show = self.max_sitemap_entries override_str = '' if (num_entries>to_show): print "Showing first %d entries sorted by URI%s..." % (to_show,override_str) n=0 for r in ri.resource_uris(): print ri.resources[r] n+=1 if ( n >= to_show ): break # 2. Set up base_path->base_uri mappings, get inventory from disk disk_inventory = self.inventory # 3. Calculate changeset (num_same,updated,deleted,created)=ri.compare(disk_inventory) changeset = Inventory() changeset.capabilities = capabilities changeset.add( disk_inventory.changeset( updated, changetype='updated' ) ) changeset.add( ri.changeset( deleted, changetype='deleted' ) ) changeset.add( disk_inventory.changeset( created, changetype='created' ) ) # 4. Write out changeset s = Sitemap(verbose=self.verbose, pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper) if (self.max_sitemap_entries is not None): s.max_sitemap_entries = self.max_sitemap_entries if (outfile is None): print s.inventory_as_xml(changeset) else: s.write(changeset,basename=outfile) self.write_dump_if_requested(changeset,dump)
def test3_with_md5(self): ib = InventoryBuilder(do_md5=True) i = ib.from_disk('resync/test/testdata/dir1', 'http://example.org/t') s = Sitemap() xml = s.inventory_as_xml(i) self.assertNotEqual( None, re.search( '<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+</lastmod><rs:size>20</rs:size><rs:md5>6bf26fd66601b528d2e0b47eaa87edfd</rs:md5>', xml), 'size/checksum for file_a') self.assertNotEqual( None, re.search( '<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+</lastmod><rs:size>45</rs:size><rs:md5>452e54bdae1626ac5d6e7be81b39de21</rs:md5>', xml), 'size/checksum for file_b')