def from_disk(self, path, url_prefix, inventory=None):
        """Create or extend inventory with resources from disk scan

        Assumes very simple disk path to URL mapping: chop path and
        replace with url_path. Returns the new or extended Inventory
        object.

        If a inventory is specified then items are added to that rather
        than creating a new one.

        mb = InventoryBuilder()
        m = inventory_from_disk('/path/to/files','http://example.org/path')
        """
        num = 0
        # Either use inventory passed in or make a new one
        if (inventory is None):
            inventory = Inventory()
        # for each file: create Resource object, add, increment counter
        for dirpath, dirs, files in os.walk(path, topdown=True):
            for file_in_dirpath in files:
                try:
                    if self.exclude_file(file_in_dirpath):
                        continue
                    # get abs filename and also URL
                    file = os.path.join(dirpath, file_in_dirpath)
                    if (not os.path.isfile(file)
                            or not (self.include_symlinks
                                    or not os.path.islink(file))):
                        continue
                    rel_path = os.path.relpath(file, start=path)
                    if (os.sep != '/'):
                        # if directory path sep isn't / then translate for URI
                        rel_path = rel_path.replace(os.sep, '/')
                    url = url_prefix + '/' + rel_path
                    file_stat = os.stat(file)
                except OSError as e:
                    sys.stderr.write("Ignoring file %s (error: %s)" %
                                     (file, str(e)))
                    continue
                mtime = file_stat.st_mtime
                lastmod = datetime.fromtimestamp(mtime).isoformat()
                r = Resource(uri=url, lastmod=lastmod)
                if (self.do_md5):
                    # add md5
                    r.md5 = compute_md5_for_file(file)
                if (self.do_size):
                    # add size
                    r.size = file_stat.st_size
                inventory.add(r)
            # prune list of dirs based on self.exclude_dirs
            for exclude in self.exclude_dirs:
                if exclude in dirs:
                    dirs.remove(exclude)
        return (inventory)
    def from_disk(self,path,url_prefix,inventory=None):
        """Create or extend inventory with resources from disk scan

        Assumes very simple disk path to URL mapping: chop path and
        replace with url_path. Returns the new or extended Inventory
        object.

        If a inventory is specified then items are added to that rather
        than creating a new one.

        mb = InventoryBuilder()
        m = inventory_from_disk('/path/to/files','http://example.org/path')
        """
        num=0
        # Either use inventory passed in or make a new one
        if (inventory is None):
            inventory = Inventory()
        # for each file: create Resource object, add, increment counter
        for dirpath, dirs, files in os.walk(path,topdown=True):
            for file_in_dirpath in files:
                try:
                    if self.exclude_file(file_in_dirpath):
                        continue
                    # get abs filename and also URL
                    file = os.path.join(dirpath,file_in_dirpath)
                    if (not os.path.isfile(file) or not (self.include_symlinks or not os.path.islink(file))):
                        continue
                    rel_path=os.path.relpath(file,start=path)
                    if (os.sep != '/'):
                        # if directory path sep isn't / then translate for URI
                        rel_path=rel_path.replace(os.sep,'/')
                    url = url_prefix+'/'+rel_path
                    file_stat=os.stat(file)
                except OSError as e:
                    sys.stderr.write("Ignoring file %s (error: %s)" % (file,str(e)))
                    continue
                mtime = file_stat.st_mtime
                lastmod = datetime.fromtimestamp(mtime).isoformat()
                r = Resource(uri=url,lastmod=lastmod)
                if (self.do_md5):
                    # add md5
                    r.md5=compute_md5_for_file(file)
                if (self.do_size):
                    # add size
                    r.size=file_stat.st_size
                inventory.add(r)
            # prune list of dirs based on self.exclude_dirs
            for exclude in self.exclude_dirs:
                if exclude in dirs:
                    dirs.remove(exclude)
        return(inventory)
Example #3
0
    def from_disk_add_map(self, inventory=None, map=None):
        # sanity
        if (inventory is None or map is None):
            raise ValueError("Must specify inventory and map")
        path=map.dst_path
        #print "walking: %s" % (path)
        # for each file: create ResourceFile object, add, increment counter
	num_files=0
        for dirpath, dirs, files in os.walk(path,topdown=True):
            for file_in_dirpath in files:
		num_files+=1
		if (num_files%50000 == 0):
		    self.logger.info("InventoryBuilder.from_disk_add_map: %d files..." % (num_files))
                try:
                    if self.exclude_file(file_in_dirpath):
                        self.logger.debug("Excluding file %s" % (file_in_dirpath))
                        continue
                    # get abs filename and also URL
                    file = os.path.join(dirpath,file_in_dirpath)
                    if (not os.path.isfile(file) or not (self.include_symlinks or not os.path.islink(file))):
                        continue
                    uri = map.dst_to_src(file)
                    if (uri is None):
                        raise Exception("Internal error, mapping failed")
                    file_stat=os.stat(file)
                except OSError as e:
                    sys.stderr.write("Ignoring file %s (error: %s)" % (file,str(e)))
                    continue
                timestamp = file_stat.st_mtime #UTC
                r = ResourceFile(uri=uri,timestamp=timestamp,file=file)
                if (self.do_md5):
                    # add md5
                    r.md5=compute_md5_for_file(file)
                if (self.do_size):
                    # add size
                    r.size=file_stat.st_size
                inventory.add(r)
            # prune list of dirs based on self.exclude_dirs
            for exclude in self.exclude_dirs:
                if exclude in dirs:
                    self.logger.debug("Excluding dir %s" % (exclude))
                    dirs.remove(exclude)
        return(inventory)
Example #4
0
    def from_disk_add_map(self, inventory=None, map=None):
        # sanity
        if (inventory is None or map is None):
            raise ValueError("Must specify inventory and map")
        path=map.dst_path
        #print "walking: %s" % (path)
        # for each file: create ResourceFile object, add, increment counter
	num_files=0
        for dirpath, dirs, files in os.walk(path,topdown=True):
            for file_in_dirpath in files:
		num_files+=1
		if ((num_files%50000 == 0) and self.verbose):
		    print "InventoryBuilder.from_disk_add_map: %d files..." % (num_files)
                try:
                    if self.exclude_file(file_in_dirpath):
                        continue
                    # get abs filename and also URL
                    file = os.path.join(dirpath,file_in_dirpath)
                    if (not os.path.isfile(file) or not (self.include_symlinks or not os.path.islink(file))):
                        continue
                    uri = map.dst_to_src(file)
                    if (uri is None):
                        raise Exception("Internal error, mapping failed")
                    file_stat=os.stat(file)
                except OSError as e:
                    sys.stderr.write("Ignoring file %s (error: %s)" % (file,str(e)))
                    continue
                mtime = file_stat.st_mtime
                lastmod = datetime.fromtimestamp(mtime).isoformat()
                r = ResourceFile(uri=uri,lastmod=lastmod,file=file)
                if (self.do_md5):
                    # add md5
                    r.md5=compute_md5_for_file(file)
                if (self.do_size):
                    # add size
                    r.size=file_stat.st_size
                inventory.add(r)
            # prune list of dirs based on self.exclude_dirs
            for exclude in self.exclude_dirs:
                if exclude in dirs:
                    dirs.remove(exclude)
        return(inventory)