def _unchanged(pathname, content): """ Return `True` if a file is unchanged from its packaged version. """ # Ignore files that are from the `base-files` package (which # doesn't include MD5 sums for every file for some reason). apt_packages = _dpkg_query_S(pathname) if 'base-files' in apt_packages: return True # Ignore files that are unchanged from their packaged version, # or match in MD5SUMS. md5sums = MD5SUMS.get(pathname, []) md5sums.extend( [_dpkg_md5sum(package, pathname) for package in apt_packages]) md5sum = _rpm_md5sum(pathname) if md5sum is not None: md5sums.append(md5sum) if (hashlib.md5(content).hexdigest() in md5sums \ or 64 in [len(md5sum or '') for md5sum in md5sums] \ and hashlib.sha256(content).hexdigest() in md5sums) \ and ignore.file(pathname, True): return True return False
def _unchanged(pathname, content): """ Return `True` if a file is unchanged from its packaged version. """ # Ignore files that are from the `base-files` package (which # doesn't include MD5 sums for every file for some reason). apt_packages = _dpkg_query_S(pathname) if 'base-files' in apt_packages: return True # Ignore files that are unchanged from their packaged version, # or match in MD5SUMS. md5sums = MD5SUMS.get(pathname, []) md5sums.extend([_dpkg_md5sum(package, pathname) for package in apt_packages]) md5sum = _rpm_md5sum(pathname) if md5sum is not None: md5sums.append(md5sum) if (hashlib.md5(content).hexdigest() in md5sums \ or 64 in [len(md5sum or '') for md5sum in md5sums] \ and hashlib.sha256(content).hexdigest() in md5sums) \ and ignore.file(pathname, True): return True return False
def files(b): logging.info("searching for configuration files") # Visit every file in `/etc` except those on the exclusion list above. for dirpath, dirnames, filenames in os.walk("/etc"): # Determine if this entire directory should be ignored by default. ignored = ignore.file(dirpath) # Collect up the full pathname to each file, `lstat` them all, and # note which ones will probably be ignored. files = [] for filename in filenames: pathname = os.path.join(dirpath, filename) try: files.append((pathname, os.lstat(pathname), ignore.file(pathname, ignored))) except OSError as e: logging.warning("{0} caused {1} - try running as root".format(pathname, errno.errorcode[e.errno])) # Track the ctime of each file in this directory. Weed out false # positives by ignoring files with common ctimes. ctimes = defaultdict(lambda: 0) # Map the ctimes of each directory entry that isn't being ignored. for pathname, s, ignored in files: if not ignored: ctimes[s.st_ctime] += 1 for dirname in dirnames: try: ctimes[os.lstat(os.path.join(dirpath, dirname)).st_ctime] += 1 except OSError: pass for pathname, s, ignored in files: # Ignore ignored files and files that share their ctime with other # files in the directory. This is a very strong indication that # the file is original to the system and should be ignored. if ignored or 1 < ctimes[s.st_ctime]: continue # The content is used even for symbolic links to determine whether # it has changed from the packaged version. try: content = open(pathname).read() except IOError: # logging.warning('{0} not readable'.format(pathname)) continue # Ignore files that are from the `base-files` package (which # doesn't include MD5 sums for every file for some reason), # are unchanged from their packaged version, or match in `MD5SUMS`. packages = _dpkg_query_S(pathname) + _rpm_qf(pathname) if "base-files" in packages: continue if 0 < len(packages): md5sums = [_dpkg_md5sum(package, pathname) for package in packages] # TODO Equivalent checksumming for RPMs. elif pathname in MD5SUMS: md5sums = MD5SUMS[pathname] for i in range(len(md5sums)): if "/" != md5sums[i][0]: continue try: md5sums[i] = hashlib.md5(open(md5sums[i]).read()).hexdigest() except IOError: pass else: md5sums = [] if 0 < len(md5sums) and hashlib.md5(content).hexdigest() in md5sums and ignore.file(pathname, True): continue if True in [_rpm_V(package, pathname) and ignore.file(pathname, True) for package in packages]: continue # A symbolic link's content is the link target. if stat.S_ISLNK(s.st_mode): content = os.readlink(pathname) # Ignore symbolic links providing backwards compatibility # between SystemV init and Upstart. if "/lib/init/upstart-job" == content: continue # Ignore symbolic links into the Debian alternatives system. # These are almost certainly managed by packages. if content.startswith("/etc/alternatives/"): continue encoding = "plain" # A regular file is stored as plain text only if it is valid # UTF-8, which is required for JSON serialization. elif stat.S_ISREG(s.st_mode): try: content = content.decode("UTF-8") encoding = "plain" except UnicodeDecodeError: content = base64.b64encode(content) encoding = "base64" # Other types, like FIFOs and sockets are not supported within # a blueprint and really shouldn't appear in `/etc` at all. else: logging.warning("{0} is not a regular file or symbolic link".format(pathname)) continue try: pw = pwd.getpwuid(s.st_uid) owner = pw.pw_name except KeyError: owner = s.st_uid try: gr = grp.getgrgid(s.st_gid) group = gr.gr_name except KeyError: group = s.st_gid b.files[pathname] = dict( content=content, encoding=encoding, group=group, mode="{0:o}".format(s.st_mode), owner=owner )
def _source(b, dirname): tmpname = os.path.join(os.getcwd(), dirname[1:].replace('/', '-')) exclude = [] pattern_pip = re.compile(r'\.egg-info/installed-files.txt$') pattern_egg = re.compile(r'\.egg(?:-info)?(?:/|$)') pattern_pth = re.compile( r'lib/python[^/]+/(?:dist|site)-packages/easy-install.pth$') pattern_bin = re.compile(r'EASY-INSTALL(?:-ENTRY)?-SCRIPT') # Create a partial shallow copy of the directory. for dirpath, dirnames, filenames in os.walk(dirname): # Determine if this entire directory should be ignored by default. ignored = ignore.file(dirpath) dirpath2 = os.path.normpath( os.path.join(tmpname, os.path.relpath(dirpath, dirname))) # Create this directory in the shallow copy with matching mode, owner, # and owning group. Suggest running as `root` if this doesn't work. os.mkdir(dirpath2) s = os.lstat(dirpath) try: try: os.lchown(dirpath2, s.st_uid, s.st_gid) except OverflowError: logging.warning('{0} has uid:gid {1}:{2} - using chown(1)' ''.format(dirpath, s.st_uid, s.st_gid)) p = subprocess.Popen(['chown', '{0}:{1}'.format(s.st_uid, s.st_gid), dirpath2], close_fds=True) p.communicate() os.chmod(dirpath2, s.st_mode) except OSError as e: logging.warning('{0} caused {1} - try running as root' ''.format(dirpath, errno.errorcode[e.errno])) return for filename in filenames: pathname = os.path.join(dirpath, filename) if ignore.source(pathname, ignored): continue pathname2 = os.path.join(dirpath2, filename) # Exclude files that are part of the RubyGems package. for globname in ( os.path.join('/usr/lib/ruby/gems/*/gems/rubygems-update-*/lib', pathname[1:]), os.path.join('/var/lib/gems/*/gems/rubygems-update-*/lib', pathname[1:])): if 0 < len(glob.glob(globname)): continue # Remember the path to all of `pip`'s `installed_files.txt` files. if pattern_pip.search(pathname): exclude.extend([os.path.join(dirpath2, line.rstrip()) for line in open(pathname)]) # Likewise remember the path to Python eggs. if pattern_egg.search(pathname): exclude.append(pathname2) # Exclude `easy_install`'s bookkeeping file, too. if pattern_pth.search(pathname): continue # Exclude executable placed by Python packages. if pathname.startswith('/usr/local/bin/') and pattern_bin.search( open(pathname).read()): continue # Exclude share/applications/mimeinfo.cache, whatever that is. if '/usr/local/share/applications/mimeinfo.cache' == pathname: continue # Hard link this file into the shallow copy. Suggest running as # `root` if this doesn't work though in practice the check above # will have already caught this problem. try: os.link(pathname, pathname2) except OSError as e: logging.warning('{0} caused {1} - try running as root' ''.format(pathname, errno.errorcode[e.errno])) return # Unlink files that were remembered for exclusion above. for pathname in exclude: try: os.unlink(pathname) except OSError as e: if e.errno not in (errno.EISDIR, errno.ENOENT): raise e # Clean up dangling symbolic links. This makes the assumption that # no one intends to leave dangling symbolic links hanging around, # which I think is a good assumption. for dirpath, dirnames, filenames in os.walk(tmpname): for filename in filenames: pathname = os.path.join(dirpath, filename) s = os.lstat(pathname) if stat.S_ISLNK(s.st_mode): try: os.stat(pathname) except OSError as e: if errno.ENOENT == e.errno: os.unlink(pathname) # Remove empty directories. For any that hang around, match their # access and modification times to the source, otherwise the hash of # the tarball will not be deterministic. for dirpath, dirnames, filenames in os.walk(tmpname, topdown=False): try: os.rmdir(dirpath) except OSError: os.utime(dirpath, (s.st_atime, s.st_mtime)) # If the shallow copy of still exists, create a tarball named by its # SHA1 sum and include it in the blueprint. try: tar = tarfile.open('tmp.tar', 'w') tar.add(tmpname, '.') except OSError: return finally: tar.close() sha1 = hashlib.sha1() f = open('tmp.tar', 'r') [sha1.update(buf) for buf in iter(lambda: f.read(4096), '')] f.close() tarname = '{0}.tar'.format(sha1.hexdigest()) os.rename('tmp.tar', tarname) b.sources[dirname] = tarname
def files(b): logging.info('searching for configuration files') # Visit every file in `/etc` except those on the exclusion list above. for dirpath, dirnames, filenames in os.walk('/etc'): # Determine if this entire directory should be ignored by default. ignored = ignore.file(dirpath) # Collect up the full pathname to each file, `lstat` them all, and # note which ones will probably be ignored. files = [] for filename in filenames: pathname = os.path.join(dirpath, filename) try: files.append((pathname, os.lstat(pathname), ignore.file(pathname, ignored))) except OSError as e: logging.warning('{0} caused {1} - try running as root'.format( pathname, errno.errorcode[e.errno])) # Track the ctime of each file in this directory. Weed out false # positives by ignoring files with common ctimes. ctimes = defaultdict(lambda: 0) # Map the ctimes of each directory entry that isn't being ignored. for pathname, s, ignored in files: if not ignored: ctimes[s.st_ctime] += 1 for dirname in dirnames: try: ctimes[os.lstat(os.path.join(dirpath, dirname)).st_ctime] += 1 except OSError: pass for pathname, s, ignored in files: # Always ignore block special files, character special files, # pipes, and sockets. They end up looking like deadlocks. if stat.S_ISBLK(s.st_mode) \ or stat.S_ISCHR(s.st_mode) \ or stat.S_ISFIFO(s.st_mode) \ or stat.S_ISSOCK(s.st_mode): continue # Make sure this pathname will actually be able to be included # in the blueprint. This is a bit of a cop-out since the file # could be important but at least it's not a crashing bug. try: pathname = unicode(pathname) except UnicodeDecodeError: logging.warning('{0} not UTF-8 - skipping it'.format( repr(pathname)[1:-1])) continue # Ignore ignored files and files that share their ctime with other # files in the directory. This is a very strong indication that # the file is original to the system and should be ignored. if ignored or 1 < ctimes[s.st_ctime] and ignore.file( pathname, True): continue # Check for a Mustache template and an optional shell script # that templatize this file. try: template = open( '{0}.blueprint-template.mustache'.format(pathname)).read() except IOError: template = None try: data = open( '{0}.blueprint-template.sh'.format(pathname)).read() except IOError: data = None # The content is used even for symbolic links to determine whether # it has changed from the packaged version. try: content = open(pathname).read() except IOError: #logging.warning('{0} not readable'.format(pathname)) continue # Ignore files that are unchanged from their packaged version. if _unchanged(pathname, content): continue # Resolve the rest of the file's metadata from the # `/etc/passwd` and `/etc/group` databases. try: pw = pwd.getpwuid(s.st_uid) owner = pw.pw_name except KeyError: owner = s.st_uid try: gr = grp.getgrgid(s.st_gid) group = gr.gr_name except KeyError: group = s.st_gid mode = '{0:o}'.format(s.st_mode) # A symbolic link's content is the link target. if stat.S_ISLNK(s.st_mode): content = os.readlink(pathname) # Ignore symbolic links providing backwards compatibility # between SystemV init and Upstart. if '/lib/init/upstart-job' == content: continue # Ignore symbolic links into the Debian alternatives system. # These are almost certainly managed by packages. if content.startswith('/etc/alternatives/'): continue b.add_file(pathname, content=content, encoding='plain', group=group, mode=mode, owner=owner) # A regular file is stored as plain text only if it is valid # UTF-8, which is required for JSON serialization. else: kwargs = dict(group=group, mode=mode, owner=owner) try: if template: if data: kwargs['data'] = data.decode('utf_8') kwargs['template'] = template.decode('utf_8') else: kwargs['content'] = content.decode('utf_8') kwargs['encoding'] = 'plain' except UnicodeDecodeError: if template: if data: kwargs['data'] = base64.b64encode(data) kwargs['template'] = base64.b64encode(template) else: kwargs['content'] = base64.b64encode(content) kwargs['encoding'] = 'base64' b.add_file(pathname, **kwargs) # If this file is a service init script or config , create a # service resource. try: manager, service = util.parse_service(pathname) if not ignore.service(manager, service): b.add_service(manager, service) b.add_service_package(manager, service, 'apt', *_dpkg_query_S(pathname)) b.add_service_package(manager, service, 'yum', *_rpm_qf(pathname)) except ValueError: pass
def files(b): logging.info('searching for configuration files') # Visit every file in `/etc` except those on the exclusion list above. for dirpath, dirnames, filenames in os.walk('/etc'): # Determine if this entire directory should be ignored by default. ignored = ignore.file(dirpath) # Collect up the full pathname to each file, `lstat` them all, and # note which ones will probably be ignored. files = [] for filename in filenames: pathname = os.path.join(dirpath, filename) try: files.append((pathname, os.lstat(pathname), ignore.file(pathname, ignored))) except OSError as e: logging.warning('{0} caused {1} - try running as root'. format(pathname, errno.errorcode[e.errno])) # Track the ctime of each file in this directory. Weed out false # positives by ignoring files with common ctimes. ctimes = defaultdict(lambda: 0) # Map the ctimes of each directory entry that isn't being ignored. for pathname, s, ignored in files: if not ignored: ctimes[s.st_ctime] += 1 for dirname in dirnames: try: ctimes[os.lstat(os.path.join(dirpath, dirname)).st_ctime] += 1 except OSError: pass for pathname, s, ignored in files: # Make sure this pathname will actually be able to be included # in the blueprint. This is a bit of a cop-out since the file # could be important but at least it's not a crashing bug. try: pathname = unicode(pathname) except UnicodeDecodeError: logging.warning('{0} not UTF-8 - skipping it'. format(repr(pathname)[1:-1])) continue # Ignore ignored files and files that share their ctime with other # files in the directory. This is a very strong indication that # the file is original to the system and should be ignored. if ignored or 1 < ctimes[s.st_ctime] and ignore.file(pathname, True): continue # The content is used even for symbolic links to determine whether # it has changed from the packaged version. try: content = open(pathname).read() except IOError: #logging.warning('{0} not readable'.format(pathname)) continue # Ignore files that are from the `base-files` package (which # doesn't include MD5 sums for every file for some reason). apt_packages = _dpkg_query_S(pathname) if 'base-files' in apt_packages: continue # Ignore files that are unchanged from their packaged version, # or match in MD5SUMS. md5sums = MD5SUMS.get(pathname, []) md5sums.extend([_dpkg_md5sum(package, pathname) for package in apt_packages]) md5sum = _rpm_md5sum(pathname) if md5sum is not None: md5sums.append(md5sum) if (hashlib.md5(content).hexdigest() in md5sums \ or 64 in [len(md5sum or '') for md5sum in md5sums] \ and hashlib.sha256(content).hexdigest() in md5sums) \ and ignore.file(pathname, True): continue # A symbolic link's content is the link target. if stat.S_ISLNK(s.st_mode): content = os.readlink(pathname) # Ignore symbolic links providing backwards compatibility # between SystemV init and Upstart. if '/lib/init/upstart-job' == content: continue # Ignore symbolic links into the Debian alternatives system. # These are almost certainly managed by packages. if content.startswith('/etc/alternatives/'): continue encoding = 'plain' # A regular file is stored as plain text only if it is valid # UTF-8, which is required for JSON serialization. elif stat.S_ISREG(s.st_mode): try: content = content.decode('utf_8') encoding = 'plain' except UnicodeDecodeError: content = base64.b64encode(content) encoding = 'base64' # Other types, like FIFOs and sockets are not supported within # a blueprint and really shouldn't appear in `/etc` at all. else: logging.warning('{0} is not a regular file or symbolic link'. format(pathname)) continue try: pw = pwd.getpwuid(s.st_uid) owner = pw.pw_name except KeyError: owner = s.st_uid try: gr = grp.getgrgid(s.st_gid) group = gr.gr_name except KeyError: group = s.st_gid b.add_file(pathname, content=content, encoding=encoding, group=group, mode='{0:o}'.format(s.st_mode), owner=owner) # If this file is a service init script or config , create a # service resource. try: manager, service = util.parse_service(pathname) if not ignore.service(manager, service): b.add_service(manager, service) b.add_service_package(manager, service, 'apt', *apt_packages) b.add_service_package(manager, service, 'yum', *_rpm_qf(pathname)) except ValueError: pass
def files(b): logging.info('searching for configuration files') # Visit every file in `/etc` except those on the exclusion list above. for dirpath, dirnames, filenames in os.walk('/etc'): # Determine if this entire directory should be ignored by default. ignored = ignore.file(dirpath) # Collect up the full pathname to each file, `lstat` them all, and # note which ones will probably be ignored. files = [] for filename in filenames: pathname = os.path.join(dirpath, filename) try: files.append((pathname, os.lstat(pathname), ignore.file(pathname, ignored))) except OSError as e: logging.warning('{0} caused {1} - try running as root'. format(pathname, errno.errorcode[e.errno])) # Track the ctime of each file in this directory. Weed out false # positives by ignoring files with common ctimes. ctimes = defaultdict(lambda: 0) # Map the ctimes of each directory entry that isn't being ignored. for pathname, s, ignored in files: if not ignored: ctimes[s.st_ctime] += 1 for dirname in dirnames: try: ctimes[os.lstat(os.path.join(dirpath, dirname)).st_ctime] += 1 except OSError: pass for pathname, s, ignored in files: # Always ignore block special files, character special files, # pipes, and sockets. They end up looking like deadlocks. if stat.S_ISBLK(s.st_mode) \ or stat.S_ISCHR(s.st_mode) \ or stat.S_ISFIFO(s.st_mode) \ or stat.S_ISSOCK(s.st_mode): continue # Make sure this pathname will actually be able to be included # in the blueprint. This is a bit of a cop-out since the file # could be important but at least it's not a crashing bug. try: pathname = unicode(pathname) except UnicodeDecodeError: logging.warning('{0} not UTF-8 - skipping it'. format(repr(pathname)[1:-1])) continue # Ignore ignored files and files that share their ctime with other # files in the directory. This is a very strong indication that # the file is original to the system and should be ignored. if ignored or 1 < ctimes[s.st_ctime] and ignore.file(pathname, True): continue # Check for a Mustache template and an optional shell script # that templatize this file. try: template = open( '{0}.blueprint-template.mustache'.format(pathname)).read() except IOError: template = None try: data = open( '{0}.blueprint-template.sh'.format(pathname)).read() except IOError: data = None # The content is used even for symbolic links to determine whether # it has changed from the packaged version. try: content = open(pathname).read() except IOError: #logging.warning('{0} not readable'.format(pathname)) continue # Ignore files that are unchanged from their packaged version. if _unchanged(pathname, content): continue # Resolve the rest of the file's metadata from the # `/etc/passwd` and `/etc/group` databases. try: pw = pwd.getpwuid(s.st_uid) owner = pw.pw_name except KeyError: owner = s.st_uid try: gr = grp.getgrgid(s.st_gid) group = gr.gr_name except KeyError: group = s.st_gid mode = '{0:o}'.format(s.st_mode) # A symbolic link's content is the link target. if stat.S_ISLNK(s.st_mode): content = os.readlink(pathname) # Ignore symbolic links providing backwards compatibility # between SystemV init and Upstart. if '/lib/init/upstart-job' == content: continue # Ignore symbolic links into the Debian alternatives system. # These are almost certainly managed by packages. if content.startswith('/etc/alternatives/'): continue b.add_file(pathname, content=content, encoding='plain', group=group, mode=mode, owner=owner) # A regular file is stored as plain text only if it is valid # UTF-8, which is required for JSON serialization. else: kwargs = dict(group=group, mode=mode, owner=owner) try: if template: if data: kwargs['data'] = data.decode('utf_8') kwargs['template'] = template.decode('utf_8') else: kwargs['content'] = content.decode('utf_8') kwargs['encoding'] = 'plain' except UnicodeDecodeError: if template: if data: kwargs['data'] = base64.b64encode(data) kwargs['template'] = base64.b64encode(template) else: kwargs['content'] = base64.b64encode(content) kwargs['encoding'] = 'base64' b.add_file(pathname, **kwargs) # If this file is a service init script or config , create a # service resource. try: manager, service = util.parse_service(pathname) if not ignore.service(manager, service): b.add_service(manager, service) b.add_service_package(manager, service, 'apt', *_dpkg_query_S(pathname)) b.add_service_package(manager, service, 'yum', *_rpm_qf(pathname)) except ValueError: pass
def _source(b, dirname, old_cwd): tmpname = os.path.join(os.getcwd(), dirname[1:].replace('/', '-')) exclude = [] pattern_pip = re.compile(r'\.egg-info/installed-files.txt$') pattern_egg = re.compile(r'\.egg(?:-info)?(?:/|$)') pattern_pth = re.compile( r'lib/python[^/]+/(?:dist|site)-packages/easy-install.pth$') pattern_bin = re.compile(r'EASY-INSTALL(?:-ENTRY)?-SCRIPT') # Create a partial shallow copy of the directory. for dirpath, dirnames, filenames in os.walk(dirname): # Definitely ignore the shallow copy directory. if dirpath.startswith(tmpname): continue # Determine if this entire directory should be ignored by default. ignored = ignore.file(dirpath) dirpath2 = os.path.normpath( os.path.join(tmpname, os.path.relpath(dirpath, dirname))) # Create this directory in the shallow copy with matching mode, owner, # and owning group. Suggest running as `root` if this doesn't work. os.mkdir(dirpath2) s = os.lstat(dirpath) try: try: os.lchown(dirpath2, s.st_uid, s.st_gid) except OverflowError: logging.warning( '{0} has uid:gid {1}:{2} - using chown(1)'.format( dirpath, s.st_uid, s.st_gid)) p = subprocess.Popen( ['chown', '{0}:{1}'.format(s.st_uid, s.st_gid), dirpath2], close_fds=True) p.communicate() os.chmod(dirpath2, s.st_mode) except OSError as e: logging.warning('{0} caused {1} - try running as root'.format( dirpath, errno.errorcode[e.errno])) return for filename in filenames: pathname = os.path.join(dirpath, filename) if ignore.source(pathname, ignored): continue pathname2 = os.path.join(dirpath2, filename) # Exclude files that are part of the RubyGems package. for globname in (os.path.join( '/usr/lib/ruby/gems/*/gems/rubygems-update-*/lib', pathname[1:]), os.path.join( '/var/lib/gems/*/gems/rubygems-update-*/lib', pathname[1:])): if 0 < len(glob.glob(globname)): continue # Remember the path to all of `pip`'s `installed_files.txt` files. if pattern_pip.search(pathname): exclude.extend([ os.path.join(dirpath2, line.rstrip()) for line in open(pathname) ]) # Likewise remember the path to Python eggs. if pattern_egg.search(pathname): exclude.append(pathname2) # Exclude `easy_install`'s bookkeeping file, too. if pattern_pth.search(pathname): continue # Exclude executable placed by Python packages. if pathname.startswith('/usr/local/bin/'): try: if pattern_bin.search(open(pathname).read()): continue except IOError as e: pass # Exclude share/applications/mimeinfo.cache, whatever that is. if '/usr/local/share/applications/mimeinfo.cache' == pathname: continue # Clean up dangling symbolic links. This makes the assumption # that no one intends to leave dangling symbolic links hanging # around, which I think is a good assumption. s = os.lstat(pathname) if stat.S_ISLNK(s.st_mode): try: os.stat(pathname) except OSError as e: if errno.ENOENT == e.errno: logging.warning( 'ignored dangling symbolic link {0}'.format( pathname)) continue # Hard link this file into the shallow copy. Suggest running as # `root` if this doesn't work though in practice the check above # will have already caught this problem. try: os.link(pathname, pathname2) except OSError as e: logging.warning('{0} caused {1} - try running as root'.format( pathname, errno.errorcode[e.errno])) return # Unlink files that were remembered for exclusion above. for pathname in exclude: try: os.unlink(pathname) except OSError as e: if e.errno not in (errno.EISDIR, errno.ENOENT): raise e # Remove empty directories. For any that hang around, match their # access and modification times to the source, otherwise the hash of # the tarball will not be deterministic. for dirpath, dirnames, filenames in os.walk(tmpname, topdown=False): try: os.rmdir(dirpath) except OSError: s = os.lstat( os.path.join(dirname, os.path.relpath(dirpath, tmpname))) os.utime(dirpath, (s.st_atime, s.st_mtime)) # If the shallow copy of still exists, create a tarball named by its # SHA1 sum and include it in the blueprint. try: tar = tarfile.open('tmp.tar', 'w') tar.add(tmpname, '.') except OSError: return finally: tar.close() sha1 = hashlib.sha1() f = open('tmp.tar', 'r') [sha1.update(buf) for buf in iter(lambda: f.read(4096), '')] f.close() tarname = '{0}.tar'.format(sha1.hexdigest()) shutil.move('tmp.tar', os.path.join(old_cwd, tarname)) b.add_source(dirname, tarname)