Exemple #1
0
 def write_static_inventory(self):
     """Writes the inventory to the filesystem"""
     # Generate sitemap in temp directory
     then = time.time()
     self.ensure_temp_dir(Source.TEMP_FILE_PATH)
     inventory = self.generate()
     basename = Source.TEMP_FILE_PATH + "/sitemap.xml"
     s = Sitemap()
     s.max_sitemap_entries = self.config['max_sitemap_entries']
     s.mapper = Mapper([self.source.base_uri, Source.TEMP_FILE_PATH])
     s.write(inventory, basename)
     # Delete old sitemap files; move the new ones; delete the temp dir
     self.rm_sitemap_files(Source.STATIC_FILE_PATH)
     self.mv_sitemap_files(Source.TEMP_FILE_PATH, Source.STATIC_FILE_PATH)
     shutil.rmtree(Source.TEMP_FILE_PATH)
     now = time.time()
     # Log Sitemap create start event
     sitemap_size = self.compute_sitemap_size(Source.STATIC_FILE_PATH)
     log_data = {
         'time': (now - then),
         'no_resources': self.source.resource_count
     }
     self.logger.info("Wrote static sitemap inventory. %s" % log_data)
     sm_write_end = ResourceChange(resource=ResourceChange(
         self.uri, size=sitemap_size, timestamp=then),
                                   changetype="UPDATED")
     self.source.notify_observers(sm_write_end)
Exemple #2
0
 def changeset_sitemap(
     self, outfile=None, ref_sitemap=None, newref_sitemap=None, empty=None, capabilities=None, dump=None
 ):
     changeset = ChangeSet()
     changeset.capabilities = capabilities
     if not empty:
         # 1. Get and parse reference sitemap
         old_inv = self.read_reference_sitemap(ref_sitemap)
         # 2. Depending on whether a newref_sitemap was specified, either read that
         # or build inventory from files on disk
         if newref_sitemap is None:
             # Get inventory from disk
             new_inv = self.inventory
         else:
             new_inv = self.read_reference_sitemap(newref_sitemap, name="new reference")
         # 3. Calculate changeset
         (same, updated, deleted, created) = old_inv.compare(new_inv)
         changeset.add_changed_resources(updated, changetype="UPDATED")
         changeset.add_changed_resources(deleted, changetype="DELETED")
         changeset.add_changed_resources(created, changetype="CREATED")
     # 4. Write out changeset
     s = Sitemap(pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper)
     if self.max_sitemap_entries is not None:
         s.max_sitemap_entries = self.max_sitemap_entries
     if outfile is None:
         print s.resources_as_xml(changeset, changeset=True)
     else:
         s.write(changeset, basename=outfile, changeset=True)
     self.write_dump_if_requested(changeset, dump)
Exemple #3
0
 def write_static_inventory(self):
     """Writes the inventory to the filesystem"""
     # Generate sitemap in temp directory
     then = time.time()
     self.ensure_temp_dir(Source.TEMP_FILE_PATH)
     inventory = self.generate()
     basename = Source.TEMP_FILE_PATH + "/sitemap.xml"
     s=Sitemap()
     s.max_sitemap_entries=self.config['max_sitemap_entries']
     s.mapper=Mapper([self.source.base_uri, Source.TEMP_FILE_PATH])
     s.write(inventory, basename)
     # Delete old sitemap files; move the new ones; delete the temp dir
     self.rm_sitemap_files(Source.STATIC_FILE_PATH)
     self.mv_sitemap_files(Source.TEMP_FILE_PATH, Source.STATIC_FILE_PATH)
     shutil.rmtree(Source.TEMP_FILE_PATH)
     now = time.time()
     # Log Sitemap create start event
     sitemap_size = self.compute_sitemap_size(Source.STATIC_FILE_PATH)
     log_data = {'time': (now-then), 
                 'no_resources': self.source.resource_count}
     self.logger.info("Wrote static sitemap inventory. %s" % log_data)
     sm_write_end = ResourceChange(
             resource = ResourceChange(self.uri, 
                             size=sitemap_size,
                             timestamp=then),
                             changetype = "UPDATED")
     self.source.notify_observers(sm_write_end)
Exemple #4
0
 def changelist_sitemap(self,outfile=None,ref_sitemap=None,newref_sitemap=None,
                       empty=None,capabilities=None,dump=None):
     changelist = ChangeList()
     changelist.capabilities = capabilities
     if (not empty):
         # 1. Get and parse reference sitemap
         old_inv = self.read_reference_sitemap(ref_sitemap)
         # 2. Depending on whether a newref_sitemap was specified, either read that 
         # or build resourcelist from files on disk
         if (newref_sitemap is None):
             # Get resourcelist from disk
             new_inv = self.resourcelist
         else:
             new_inv = self.read_reference_sitemap(newref_sitemap,name='new reference')
         # 3. Calculate changelist
         (same,updated,deleted,created)=old_inv.compare(new_inv)   
         changelist.add_changed_resources( updated, change='updated' )
         changelist.add_changed_resources( deleted, change='deleted' )
         changelist.add_changed_resources( created, change='created' )
     # 4. Write out changelist
     s = Sitemap(pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper)
     if (self.max_sitemap_entries is not None):
         s.max_sitemap_entries = self.max_sitemap_entries
     if (outfile is None):
         print s.resources_as_xml(changelist,changelist=True)
     else:
         s.write(changelist,basename=outfile,changelist=True)
     self.write_dump_if_requested(changelist,dump)
Exemple #5
0
 def write_static_inventory(self):
     """Writes the inventory to the filesystem"""
     self.generate()
     self.delete_sitemap_files()
     basename = Source.STATIC_FILE_PATH + "/sitemap.xml"
     then = time.time()
     s=Sitemap()
     s.max_sitemap_entries=self.config['max_sitemap_entries']
     s.mapper=Mapper([self.source.base_uri, Source.STATIC_FILE_PATH])
     s.write(self, basename)
     now = time.time()
     print "Wrote static sitemap in %s seconds" % str(now-then)
Exemple #6
0
 def write_sitemap(self, outfile=None, capabilities=None, dump=None):
     # Set up base_path->base_uri mappings, get inventory from disk
     i = self.inventory
     i.capabilities = capabilities
     s = Sitemap(pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper)
     if self.max_sitemap_entries is not None:
         s.max_sitemap_entries = self.max_sitemap_entries
     if outfile is None:
         print s.resources_as_xml(i, capabilities=i.capabilities)
     else:
         s.write(i, basename=outfile)
     self.write_dump_if_requested(i, dump)
Exemple #7
0
 def write_changeset(self):
     """Writes all cached changes to a file; empties the cache"""
     then = time.time()
     changeset = self.generate()
     basename = Source.STATIC_FILE_PATH + "/" + self.current_changeset_file()
     s=Sitemap()
     s.max_sitemap_entries=self.config['max_sitemap_entries']
     s.mapper=Mapper([self.source.base_uri, Source.STATIC_FILE_PATH])
     s.write(changeset, basename)
     now = time.time()
     self.previous_changeset_id = self.previous_changeset_id + 1
     self.logger.info("Wrote static changeset..")
Exemple #8
0
 def changeset_sitemap(self,
                       outfile=None,
                       ref_sitemap=None,
                       capabilities=None,
                       dump=None):
     # 1. Get and parse reference sitemap
     rs = Sitemap(verbose=self.verbose,
                  allow_multifile=self.allow_multifile,
                  mapper=self.mapper)
     if (self.verbose):
         print "Reading sitemap(s) from %s ..." % (ref_sitemap)
     ri = rs.read(ref_sitemap)
     num_entries = len(ri)
     print "Read reference sitemap with %d entries in %d sitemaps" % (
         num_entries, rs.sitemaps_created)
     if (self.verbose):
         to_show = 100
         override_str = ' (override with --max-sitemap-entries)'
         if (self.max_sitemap_entries):
             to_show = self.max_sitemap_entries
             override_str = ''
         if (num_entries > to_show):
             print "Showing first %d entries sorted by URI%s..." % (
                 to_show, override_str)
         n = 0
         for r in ri.resource_uris():
             print ri.resources[r]
             n += 1
             if (n >= to_show):
                 break
     # 2. Set up base_path->base_uri mappings, get inventory from disk
     disk_inventory = self.inventory
     # 3. Calculate changeset
     (num_same, updated, deleted, created) = ri.compare(disk_inventory)
     changeset = Inventory()
     changeset.capabilities = capabilities
     changeset.add(disk_inventory.changeset(updated, changetype='updated'))
     changeset.add(ri.changeset(deleted, changetype='deleted'))
     changeset.add(disk_inventory.changeset(created, changetype='created'))
     # 4. Write out changeset
     s = Sitemap(verbose=self.verbose,
                 pretty_xml=True,
                 allow_multifile=self.allow_multifile,
                 mapper=self.mapper)
     if (self.max_sitemap_entries is not None):
         s.max_sitemap_entries = self.max_sitemap_entries
     if (outfile is None):
         print s.inventory_as_xml(changeset)
     else:
         s.write(changeset, basename=outfile)
     self.write_dump_if_requested(changeset, dump)
Exemple #9
0
 def write_sitemap(self, outfile=None, capabilities=None, dump=None):
     # Set up base_path->base_uri mappings, get inventory from disk
     i = self.inventory
     i.capabilities = capabilities
     s = Sitemap(pretty_xml=True,
                 allow_multifile=self.allow_multifile,
                 mapper=self.mapper)
     if (self.max_sitemap_entries is not None):
         s.max_sitemap_entries = self.max_sitemap_entries
     if (outfile is None):
         print s.resources_as_xml(i, capabilities=i.capabilities)
     else:
         s.write(i, basename=outfile)
     self.write_dump_if_requested(i, dump)
 def write_changeset(self):
     """Writes all cached changes to a file; empties the cache"""
     then = time.time()
     changeset = self.generate()
     basename = Source.STATIC_FILE_PATH + "/" + self.current_changeset_file()
     s=Sitemap()
     s.max_sitemap_entries=self.config['max_sitemap_entries']
     s.mapper=Mapper([self.source.base_uri, Source.STATIC_FILE_PATH])
     s.write(changeset, basename)
     now = time.time()
     # sitemap_size = 50
     log_data = {}
     # log_data = {'time': (now-then), 
     #             'no_resources': self.source.resource_count}
     self.previous_changeset_id = self.previous_changeset_id + 1
     self.logger.info("Wrote static changeset. %s" % log_data)
Exemple #11
0
    def changeset_sitemap(self,outfile=None,ref_sitemap=None,capabilities=None,
                          dump=None):
        # 1. Get and parse reference sitemap
        rs = Sitemap(verbose=self.verbose, allow_multifile=self.allow_multifile, 
                     mapper=self.mapper)
        if (self.verbose):
            print "Reading sitemap(s) from %s ..." % (ref_sitemap)
        ri = rs.read(ref_sitemap)
        num_entries = len(ri)
        print "Read reference sitemap with %d entries in %d sitemaps" % (num_entries,rs.sitemaps_created)
        if (self.verbose):
            to_show = 100
            override_str = ' (override with --max-sitemap-entries)'
            if (self.max_sitemap_entries):
                to_show = self.max_sitemap_entries
                override_str = ''
            if (num_entries>to_show):
                print "Showing first %d entries sorted by URI%s..." % (to_show,override_str)
            n=0
            for r in ri.resource_uris():
                print ri.resources[r]
                n+=1
                if ( n >= to_show ):
                    break
        # 2. Set up base_path->base_uri mappings, get inventory from disk
        disk_inventory = self.inventory
        # 3. Calculate changeset
        (num_same,updated,deleted,created)=ri.compare(disk_inventory)   
        changeset = Inventory()
        changeset.capabilities = capabilities
        changeset.add( disk_inventory.changeset( updated, changetype='updated' ) )
        changeset.add( ri.changeset( deleted, changetype='deleted' ) )
        changeset.add( disk_inventory.changeset( created, changetype='created' ) )
        # 4. Write out changeset
        s = Sitemap(verbose=self.verbose, pretty_xml=True, allow_multifile=self.allow_multifile,
	            mapper=self.mapper)
        if (self.max_sitemap_entries is not None):
            s.max_sitemap_entries = self.max_sitemap_entries
        if (outfile is None):
            print s.inventory_as_xml(changeset)
        else:
            s.write(changeset,basename=outfile)
        self.write_dump_if_requested(changeset,dump)
Exemple #12
0
 def changeset_sitemap(self,
                       outfile=None,
                       ref_sitemap=None,
                       newref_sitemap=None,
                       empty=None,
                       capabilities=None,
                       dump=None):
     changeset = ChangeSet()
     changeset.capabilities = capabilities
     if (not empty):
         # 1. Get and parse reference sitemap
         old_inv = self.read_reference_sitemap(ref_sitemap)
         # 2. Depending on whether a newref_sitemap was specified, either read that
         # or build inventory from files on disk
         if (newref_sitemap is None):
             # Get inventory from disk
             new_inv = self.inventory
         else:
             new_inv = self.read_reference_sitemap(newref_sitemap,
                                                   name='new reference')
         # 3. Calculate changeset
         (same, updated, deleted, created) = old_inv.compare(new_inv)
         changeset.add_changed_resources(updated, changetype='UPDATED')
         changeset.add_changed_resources(deleted, changetype='DELETED')
         changeset.add_changed_resources(created, changetype='CREATED')
     # 4. Write out changeset
     s = Sitemap(pretty_xml=True,
                 allow_multifile=self.allow_multifile,
                 mapper=self.mapper)
     if (self.max_sitemap_entries is not None):
         s.max_sitemap_entries = self.max_sitemap_entries
     if (outfile is None):
         print s.resources_as_xml(changeset, changeset=True)
     else:
         s.write(changeset, basename=outfile, changeset=True)
     self.write_dump_if_requested(changeset, dump)