def from_disk(self, path, url_prefix, inventory=None): """Create or extend inventory with resources from disk scan Assumes very simple disk path to URL mapping: chop path and replace with url_path. Returns the new or extended Inventory object. If a inventory is specified then items are added to that rather than creating a new one. mb = InventoryBuilder() m = inventory_from_disk('/path/to/files','http://example.org/path') """ num = 0 # Either use inventory passed in or make a new one if (inventory is None): inventory = Inventory() # for each file: create Resource object, add, increment counter for dirpath, dirs, files in os.walk(path, topdown=True): for file_in_dirpath in files: try: if self.exclude_file(file_in_dirpath): continue # get abs filename and also URL file = os.path.join(dirpath, file_in_dirpath) if (not os.path.isfile(file) or not (self.include_symlinks or not os.path.islink(file))): continue rel_path = os.path.relpath(file, start=path) if (os.sep != '/'): # if directory path sep isn't / then translate for URI rel_path = rel_path.replace(os.sep, '/') url = url_prefix + '/' + rel_path file_stat = os.stat(file) except OSError as e: sys.stderr.write("Ignoring file %s (error: %s)" % (file, str(e))) continue mtime = file_stat.st_mtime lastmod = datetime.fromtimestamp(mtime).isoformat() r = Resource(uri=url, lastmod=lastmod) if (self.do_md5): # add md5 r.md5 = compute_md5_for_file(file) if (self.do_size): # add size r.size = file_stat.st_size inventory.add(r) # prune list of dirs based on self.exclude_dirs for exclude in self.exclude_dirs: if exclude in dirs: dirs.remove(exclude) return (inventory)
def from_disk(self,path,url_prefix,inventory=None): """Create or extend inventory with resources from disk scan Assumes very simple disk path to URL mapping: chop path and replace with url_path. Returns the new or extended Inventory object. If a inventory is specified then items are added to that rather than creating a new one. mb = InventoryBuilder() m = inventory_from_disk('/path/to/files','http://example.org/path') """ num=0 # Either use inventory passed in or make a new one if (inventory is None): inventory = Inventory() # for each file: create Resource object, add, increment counter for dirpath, dirs, files in os.walk(path,topdown=True): for file_in_dirpath in files: try: if self.exclude_file(file_in_dirpath): continue # get abs filename and also URL file = os.path.join(dirpath,file_in_dirpath) if (not os.path.isfile(file) or not (self.include_symlinks or not os.path.islink(file))): continue rel_path=os.path.relpath(file,start=path) if (os.sep != '/'): # if directory path sep isn't / then translate for URI rel_path=rel_path.replace(os.sep,'/') url = url_prefix+'/'+rel_path file_stat=os.stat(file) except OSError as e: sys.stderr.write("Ignoring file %s (error: %s)" % (file,str(e))) continue mtime = file_stat.st_mtime lastmod = datetime.fromtimestamp(mtime).isoformat() r = Resource(uri=url,lastmod=lastmod) if (self.do_md5): # add md5 r.md5=compute_md5_for_file(file) if (self.do_size): # add size r.size=file_stat.st_size inventory.add(r) # prune list of dirs based on self.exclude_dirs for exclude in self.exclude_dirs: if exclude in dirs: dirs.remove(exclude) return(inventory)
def from_disk_add_map(self, inventory=None, map=None): # sanity if (inventory is None or map is None): raise ValueError("Must specify inventory and map") path=map.dst_path #print "walking: %s" % (path) # for each file: create ResourceFile object, add, increment counter num_files=0 for dirpath, dirs, files in os.walk(path,topdown=True): for file_in_dirpath in files: num_files+=1 if (num_files%50000 == 0): self.logger.info("InventoryBuilder.from_disk_add_map: %d files..." % (num_files)) try: if self.exclude_file(file_in_dirpath): self.logger.debug("Excluding file %s" % (file_in_dirpath)) continue # get abs filename and also URL file = os.path.join(dirpath,file_in_dirpath) if (not os.path.isfile(file) or not (self.include_symlinks or not os.path.islink(file))): continue uri = map.dst_to_src(file) if (uri is None): raise Exception("Internal error, mapping failed") file_stat=os.stat(file) except OSError as e: sys.stderr.write("Ignoring file %s (error: %s)" % (file,str(e))) continue timestamp = file_stat.st_mtime #UTC r = ResourceFile(uri=uri,timestamp=timestamp,file=file) if (self.do_md5): # add md5 r.md5=compute_md5_for_file(file) if (self.do_size): # add size r.size=file_stat.st_size inventory.add(r) # prune list of dirs based on self.exclude_dirs for exclude in self.exclude_dirs: if exclude in dirs: self.logger.debug("Excluding dir %s" % (exclude)) dirs.remove(exclude) return(inventory)
def from_disk_add_map(self, inventory=None, map=None): # sanity if (inventory is None or map is None): raise ValueError("Must specify inventory and map") path=map.dst_path #print "walking: %s" % (path) # for each file: create ResourceFile object, add, increment counter num_files=0 for dirpath, dirs, files in os.walk(path,topdown=True): for file_in_dirpath in files: num_files+=1 if ((num_files%50000 == 0) and self.verbose): print "InventoryBuilder.from_disk_add_map: %d files..." % (num_files) try: if self.exclude_file(file_in_dirpath): continue # get abs filename and also URL file = os.path.join(dirpath,file_in_dirpath) if (not os.path.isfile(file) or not (self.include_symlinks or not os.path.islink(file))): continue uri = map.dst_to_src(file) if (uri is None): raise Exception("Internal error, mapping failed") file_stat=os.stat(file) except OSError as e: sys.stderr.write("Ignoring file %s (error: %s)" % (file,str(e))) continue mtime = file_stat.st_mtime lastmod = datetime.fromtimestamp(mtime).isoformat() r = ResourceFile(uri=uri,lastmod=lastmod,file=file) if (self.do_md5): # add md5 r.md5=compute_md5_for_file(file) if (self.do_size): # add size r.size=file_stat.st_size inventory.add(r) # prune list of dirs based on self.exclude_dirs for exclude in self.exclude_dirs: if exclude in dirs: dirs.remove(exclude) return(inventory)