def write_static_inventory(self): """Writes the inventory to the filesystem""" # Generate sitemap in temp directory then = time.time() self.ensure_temp_dir(Source.TEMP_FILE_PATH) inventory = self.generate() basename = Source.TEMP_FILE_PATH + "/sitemap.xml" s = Sitemap() s.max_sitemap_entries = self.config['max_sitemap_entries'] s.mapper = Mapper([self.source.base_uri, Source.TEMP_FILE_PATH]) s.write(inventory, basename) # Delete old sitemap files; move the new ones; delete the temp dir self.rm_sitemap_files(Source.STATIC_FILE_PATH) self.mv_sitemap_files(Source.TEMP_FILE_PATH, Source.STATIC_FILE_PATH) shutil.rmtree(Source.TEMP_FILE_PATH) now = time.time() # Log Sitemap create start event sitemap_size = self.compute_sitemap_size(Source.STATIC_FILE_PATH) log_data = { 'time': (now - then), 'no_resources': self.source.resource_count } self.logger.info("Wrote static sitemap inventory. %s" % log_data) sm_write_end = ResourceChange(resource=ResourceChange( self.uri, size=sitemap_size, timestamp=then), changetype="UPDATED") self.source.notify_observers(sm_write_end)
def changeset_sitemap( self, outfile=None, ref_sitemap=None, newref_sitemap=None, empty=None, capabilities=None, dump=None ): changeset = ChangeSet() changeset.capabilities = capabilities if not empty: # 1. Get and parse reference sitemap old_inv = self.read_reference_sitemap(ref_sitemap) # 2. Depending on whether a newref_sitemap was specified, either read that # or build inventory from files on disk if newref_sitemap is None: # Get inventory from disk new_inv = self.inventory else: new_inv = self.read_reference_sitemap(newref_sitemap, name="new reference") # 3. Calculate changeset (same, updated, deleted, created) = old_inv.compare(new_inv) changeset.add_changed_resources(updated, changetype="UPDATED") changeset.add_changed_resources(deleted, changetype="DELETED") changeset.add_changed_resources(created, changetype="CREATED") # 4. Write out changeset s = Sitemap(pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper) if self.max_sitemap_entries is not None: s.max_sitemap_entries = self.max_sitemap_entries if outfile is None: print s.resources_as_xml(changeset, changeset=True) else: s.write(changeset, basename=outfile, changeset=True) self.write_dump_if_requested(changeset, dump)
def write_static_inventory(self): """Writes the inventory to the filesystem""" # Generate sitemap in temp directory then = time.time() self.ensure_temp_dir(Source.TEMP_FILE_PATH) inventory = self.generate() basename = Source.TEMP_FILE_PATH + "/sitemap.xml" s=Sitemap() s.max_sitemap_entries=self.config['max_sitemap_entries'] s.mapper=Mapper([self.source.base_uri, Source.TEMP_FILE_PATH]) s.write(inventory, basename) # Delete old sitemap files; move the new ones; delete the temp dir self.rm_sitemap_files(Source.STATIC_FILE_PATH) self.mv_sitemap_files(Source.TEMP_FILE_PATH, Source.STATIC_FILE_PATH) shutil.rmtree(Source.TEMP_FILE_PATH) now = time.time() # Log Sitemap create start event sitemap_size = self.compute_sitemap_size(Source.STATIC_FILE_PATH) log_data = {'time': (now-then), 'no_resources': self.source.resource_count} self.logger.info("Wrote static sitemap inventory. %s" % log_data) sm_write_end = ResourceChange( resource = ResourceChange(self.uri, size=sitemap_size, timestamp=then), changetype = "UPDATED") self.source.notify_observers(sm_write_end)
def changelist_sitemap(self,outfile=None,ref_sitemap=None,newref_sitemap=None, empty=None,capabilities=None,dump=None): changelist = ChangeList() changelist.capabilities = capabilities if (not empty): # 1. Get and parse reference sitemap old_inv = self.read_reference_sitemap(ref_sitemap) # 2. Depending on whether a newref_sitemap was specified, either read that # or build resourcelist from files on disk if (newref_sitemap is None): # Get resourcelist from disk new_inv = self.resourcelist else: new_inv = self.read_reference_sitemap(newref_sitemap,name='new reference') # 3. Calculate changelist (same,updated,deleted,created)=old_inv.compare(new_inv) changelist.add_changed_resources( updated, change='updated' ) changelist.add_changed_resources( deleted, change='deleted' ) changelist.add_changed_resources( created, change='created' ) # 4. Write out changelist s = Sitemap(pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper) if (self.max_sitemap_entries is not None): s.max_sitemap_entries = self.max_sitemap_entries if (outfile is None): print s.resources_as_xml(changelist,changelist=True) else: s.write(changelist,basename=outfile,changelist=True) self.write_dump_if_requested(changelist,dump)
def write_static_inventory(self): """Writes the inventory to the filesystem""" self.generate() self.delete_sitemap_files() basename = Source.STATIC_FILE_PATH + "/sitemap.xml" then = time.time() s=Sitemap() s.max_sitemap_entries=self.config['max_sitemap_entries'] s.mapper=Mapper([self.source.base_uri, Source.STATIC_FILE_PATH]) s.write(self, basename) now = time.time() print "Wrote static sitemap in %s seconds" % str(now-then)
def write_sitemap(self, outfile=None, capabilities=None, dump=None): # Set up base_path->base_uri mappings, get inventory from disk i = self.inventory i.capabilities = capabilities s = Sitemap(pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper) if self.max_sitemap_entries is not None: s.max_sitemap_entries = self.max_sitemap_entries if outfile is None: print s.resources_as_xml(i, capabilities=i.capabilities) else: s.write(i, basename=outfile) self.write_dump_if_requested(i, dump)
def write_changeset(self): """Writes all cached changes to a file; empties the cache""" then = time.time() changeset = self.generate() basename = Source.STATIC_FILE_PATH + "/" + self.current_changeset_file() s=Sitemap() s.max_sitemap_entries=self.config['max_sitemap_entries'] s.mapper=Mapper([self.source.base_uri, Source.STATIC_FILE_PATH]) s.write(changeset, basename) now = time.time() self.previous_changeset_id = self.previous_changeset_id + 1 self.logger.info("Wrote static changeset..")
def changeset_sitemap(self, outfile=None, ref_sitemap=None, capabilities=None, dump=None): # 1. Get and parse reference sitemap rs = Sitemap(verbose=self.verbose, allow_multifile=self.allow_multifile, mapper=self.mapper) if (self.verbose): print "Reading sitemap(s) from %s ..." % (ref_sitemap) ri = rs.read(ref_sitemap) num_entries = len(ri) print "Read reference sitemap with %d entries in %d sitemaps" % ( num_entries, rs.sitemaps_created) if (self.verbose): to_show = 100 override_str = ' (override with --max-sitemap-entries)' if (self.max_sitemap_entries): to_show = self.max_sitemap_entries override_str = '' if (num_entries > to_show): print "Showing first %d entries sorted by URI%s..." % ( to_show, override_str) n = 0 for r in ri.resource_uris(): print ri.resources[r] n += 1 if (n >= to_show): break # 2. Set up base_path->base_uri mappings, get inventory from disk disk_inventory = self.inventory # 3. Calculate changeset (num_same, updated, deleted, created) = ri.compare(disk_inventory) changeset = Inventory() changeset.capabilities = capabilities changeset.add(disk_inventory.changeset(updated, changetype='updated')) changeset.add(ri.changeset(deleted, changetype='deleted')) changeset.add(disk_inventory.changeset(created, changetype='created')) # 4. Write out changeset s = Sitemap(verbose=self.verbose, pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper) if (self.max_sitemap_entries is not None): s.max_sitemap_entries = self.max_sitemap_entries if (outfile is None): print s.inventory_as_xml(changeset) else: s.write(changeset, basename=outfile) self.write_dump_if_requested(changeset, dump)
def write_sitemap(self, outfile=None, capabilities=None, dump=None): # Set up base_path->base_uri mappings, get inventory from disk i = self.inventory i.capabilities = capabilities s = Sitemap(pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper) if (self.max_sitemap_entries is not None): s.max_sitemap_entries = self.max_sitemap_entries if (outfile is None): print s.resources_as_xml(i, capabilities=i.capabilities) else: s.write(i, basename=outfile) self.write_dump_if_requested(i, dump)
def write_changeset(self): """Writes all cached changes to a file; empties the cache""" then = time.time() changeset = self.generate() basename = Source.STATIC_FILE_PATH + "/" + self.current_changeset_file() s=Sitemap() s.max_sitemap_entries=self.config['max_sitemap_entries'] s.mapper=Mapper([self.source.base_uri, Source.STATIC_FILE_PATH]) s.write(changeset, basename) now = time.time() # sitemap_size = 50 log_data = {} # log_data = {'time': (now-then), # 'no_resources': self.source.resource_count} self.previous_changeset_id = self.previous_changeset_id + 1 self.logger.info("Wrote static changeset. %s" % log_data)
def changeset_sitemap(self,outfile=None,ref_sitemap=None,capabilities=None, dump=None): # 1. Get and parse reference sitemap rs = Sitemap(verbose=self.verbose, allow_multifile=self.allow_multifile, mapper=self.mapper) if (self.verbose): print "Reading sitemap(s) from %s ..." % (ref_sitemap) ri = rs.read(ref_sitemap) num_entries = len(ri) print "Read reference sitemap with %d entries in %d sitemaps" % (num_entries,rs.sitemaps_created) if (self.verbose): to_show = 100 override_str = ' (override with --max-sitemap-entries)' if (self.max_sitemap_entries): to_show = self.max_sitemap_entries override_str = '' if (num_entries>to_show): print "Showing first %d entries sorted by URI%s..." % (to_show,override_str) n=0 for r in ri.resource_uris(): print ri.resources[r] n+=1 if ( n >= to_show ): break # 2. Set up base_path->base_uri mappings, get inventory from disk disk_inventory = self.inventory # 3. Calculate changeset (num_same,updated,deleted,created)=ri.compare(disk_inventory) changeset = Inventory() changeset.capabilities = capabilities changeset.add( disk_inventory.changeset( updated, changetype='updated' ) ) changeset.add( ri.changeset( deleted, changetype='deleted' ) ) changeset.add( disk_inventory.changeset( created, changetype='created' ) ) # 4. Write out changeset s = Sitemap(verbose=self.verbose, pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper) if (self.max_sitemap_entries is not None): s.max_sitemap_entries = self.max_sitemap_entries if (outfile is None): print s.inventory_as_xml(changeset) else: s.write(changeset,basename=outfile) self.write_dump_if_requested(changeset,dump)
def changeset_sitemap(self, outfile=None, ref_sitemap=None, newref_sitemap=None, empty=None, capabilities=None, dump=None): changeset = ChangeSet() changeset.capabilities = capabilities if (not empty): # 1. Get and parse reference sitemap old_inv = self.read_reference_sitemap(ref_sitemap) # 2. Depending on whether a newref_sitemap was specified, either read that # or build inventory from files on disk if (newref_sitemap is None): # Get inventory from disk new_inv = self.inventory else: new_inv = self.read_reference_sitemap(newref_sitemap, name='new reference') # 3. Calculate changeset (same, updated, deleted, created) = old_inv.compare(new_inv) changeset.add_changed_resources(updated, changetype='UPDATED') changeset.add_changed_resources(deleted, changetype='DELETED') changeset.add_changed_resources(created, changetype='CREATED') # 4. Write out changeset s = Sitemap(pretty_xml=True, allow_multifile=self.allow_multifile, mapper=self.mapper) if (self.max_sitemap_entries is not None): s.max_sitemap_entries = self.max_sitemap_entries if (outfile is None): print s.resources_as_xml(changeset, changeset=True) else: s.write(changeset, basename=outfile, changeset=True) self.write_dump_if_requested(changeset, dump)