def compare_zip_files(path1, path2, source=None): differences = [] try: with ZipFile(path1, 'r') as zip1: with ZipFile(path2, 'r') as zip2: # look up differences in content with make_temp_directory() as temp_dir1: with make_temp_directory() as temp_dir2: for name in sorted( set(zip1.namelist()).intersection( zip2.namelist())): # skip directories if name.endswith('/'): continue logger.debug('extract member %s', name) in_path1 = zip1.extract(name, temp_dir1) in_path2 = zip2.extract(name, temp_dir2) differences.extend( debbindiff.comparators.compare_files( in_path1, in_path2, source=name)) os.unlink(in_path1) os.unlink(in_path2) # look up differences in metadata difference = Difference.from_command(Zipinfo, path1, path2) if not difference: # search harder difference = Difference.from_command( ZipinfoVerbose, path1, path2) if difference: differences.append(difference) except BadZipfile: logger.debug('Either %s or %s is not a zip file.' % (path1, path2)) # we'll fallback on binary comparison return differences
def compare_iso9660_files(path1, path2, source=None): differences = [] # compare metadata differences.append(Difference.from_command(ISO9660PVD, path1, path2)) for extension in (None, 'joliet', 'rockridge'): differences.append(Difference.from_command(ISO9660Listing, path1, path2, command_args=(extension,))) # compare files contained in image files1 = get_iso9660_names(path1) files2 = get_iso9660_names(path2) with make_temp_directory() as temp_dir1: with make_temp_directory() as temp_dir2: for name in sorted(set(files1).intersection(files2)): logger.debug('extract file %s' % name) in_path1 = os.path.join(temp_dir1, os.path.basename(name)) in_path2 = os.path.join(temp_dir2, os.path.basename(name)) with open(in_path1, 'w') as dest: extract_from_iso9660(path1, name, dest) with open(in_path2, 'w') as dest: extract_from_iso9660(path2, name, dest) differences.append(debbindiff.comparators.compare_files( in_path1, in_path2, source=name)) return differences
def output_difference(difference, print_func, parents): logger.debug('html output for %s', difference.source1) sources = parents + [difference.source1] print_func(u"<div class='difference'>") try: print_func(u"<div class='diffheader'>") if difference.source1 == difference.source2: print_func(u"<div><span class='source'>%s<span>" % escape(difference.source1)) else: print_func(u"<div><span class='source'>%s</span> vs.</div>" % escape(difference.source1)) print_func(u"<div><span class='source'>%s</span>" % escape(difference.source2)) anchor = '/'.join(sources[1:]) print_func(u" <a class='anchor' href='#%s' name='%s'>¶</a>" % (anchor, anchor)) print_func(u"</div>") if difference.comment: print_func(u"<div class='comment'>%s</div>" % escape(difference.comment).replace('\n', '<br />')) print_func(u"</div>") if difference.unified_diff: output_unified_diff(print_func, difference.unified_diff) for detail in difference.details: output_difference(detail, print_func, sources) except PrintLimitReached: logger.debug('print limit reached') raise finally: print_func(u"</div>", force=True)
def compare_iso9660_files(path1, path2, source=None): differences = [] # compare metadata difference = Difference.from_command(ISO9660PVD, path1, path2) if difference: differences.append(difference) for extension in (None, 'joliet', 'rockridge'): difference = Difference.from_command(ISO9660Listing, path1, path2, command_args=(extension, )) if difference: differences.append(difference) # compare files contained in image files1 = get_iso9660_names(path1) files2 = get_iso9660_names(path2) with make_temp_directory() as temp_dir1: with make_temp_directory() as temp_dir2: for name in sorted(set(files1).intersection(files2)): logger.debug('extract file %s' % name) in_path1 = os.path.join(temp_dir1, os.path.basename(name)) in_path2 = os.path.join(temp_dir2, os.path.basename(name)) with open(in_path1, 'w') as dest: extract_from_iso9660(path1, name, dest) with open(in_path2, 'w') as dest: extract_from_iso9660(path2, name, dest) differences.extend( debbindiff.comparators.compare_files(in_path1, in_path2, source=name)) return differences
def compare_meta(path1, path2): logger.debug('compare_meta(%s, %s)' % (path1, path2)) differences = [] try: difference = Difference.from_command(Stat, path1, path2) if difference: differences.append(difference) except RequiredToolNotFound: logger.warn("'stat' not found! Is PATH wrong?") try: lsattr1 = lsattr(path1) lsattr2 = lsattr(path2) difference = Difference.from_unicode( lsattr1, lsattr2, path1, path2, source="lattr") if difference: differences.append(difference) except RequiredToolNotFound: logger.info("Unable to find 'lsattr'.") try: difference = Difference.from_command(Getfacl, path1, path2) if difference: differences.append(difference) except RequiredToolNotFound: logger.info("Unable to find 'getfacl'.") return differences
def compare_zip_files(path1, path2, source=None): differences = [] try: with ZipFile(path1, 'r') as zip1: with ZipFile(path2, 'r') as zip2: # look up differences in content with make_temp_directory() as temp_dir1: with make_temp_directory() as temp_dir2: for name in sorted(set(zip1.namelist()) .intersection(zip2.namelist())): # skip directories if name.endswith('/'): continue logger.debug('extract member %s', name) in_path1 = zip1.extract(name, temp_dir1) in_path2 = zip2.extract(name, temp_dir2) differences.append( debbindiff.comparators.compare_files( in_path1, in_path2, source=name)) os.unlink(in_path1) os.unlink(in_path2) # look up differences in metadata difference = Difference.from_command(Zipinfo, path1, path2) if not difference: # search harder difference = Difference.from_command(ZipinfoVerbose, path1, path2) differences.append(difference) except BadZipfile: logger.debug('Either %s or %s is not a zip file.' % (path1, path2)) # we'll fallback on binary comparison return differences
def run_diff(fd1, fd2, end_nl_q1, end_nl_q2): logger.debug('running diff') cmd = ['diff', '-au7', '/dev/fd/%d' % fd1, '/dev/fd/%d' % fd2] def close_fds(): fds = [int(fd) for fd in os.listdir('/dev/fd') if int(fd) not in (1, 2, fd1, fd2)] for fd in fds: try: os.close(fd) except OSError: pass p = subprocess.Popen(cmd, shell=False, bufsize=1, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=close_fds) p.stdin.close() os.close(fd1) os.close(fd2) parser = DiffParser(p.stdout, end_nl_q1, end_nl_q2) t_read = Thread(target=parser.parse) t_read.daemon = True t_read.start() t_read.join() p.wait() if not parser.success and p.returncode not in (0, 1): raise subprocess.CalledProcessError(cmd, p.returncode, output=diff) if p.returncode == 0: return None return parser.diff
def extract_squashfs(path, destdir): cmd = ['unsquashfs', '-n', '-f', '-d', destdir, path] logger.debug("extracting %s into %s", path, destdir) p = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE) p.communicate() p.wait() if p.returncode != 0: logger.error('unsquashfs exited with error code %d', p.returncode)
def extract_cpio_archive(path, destdir): cmd = ['cpio', '--no-absolute-filenames', '--quiet', '-idF', os.path.abspath(path.encode('utf-8'))] logger.debug("extracting %s into %s", path.encode('utf-8'), destdir) p = subprocess.Popen(cmd, shell=False, cwd=destdir) p.communicate() p.wait() if p.returncode != 0: logger.error('cpio exited with error code %d', p.returncode)
def extract_cpio_archive(path, destdir): cmd = [ 'cpio', '--no-absolute-filenames', '--quiet', '-idF', os.path.abspath(path.encode('utf-8')) ] logger.debug("extracting %s into %s", path.encode('utf-8'), destdir) p = subprocess.Popen(cmd, shell=False, cwd=destdir) p.communicate() p.wait() if p.returncode != 0: logger.error('cpio exited with error code %d', p.returncode)
def validate_checksums(self, check_hash="sha1"): """ Validate checksums for a package, using ``check_hack``'s type to validate the package. Valid ``check_hash`` types: * sha1 * sha256 * md5 * md5sum """ logger.debug("validating %s checksums", check_hash) for filename in self.get_files(): if check_hash == "sha1": hash_type = hashlib.sha1() checksums = self.get("Checksums-Sha1") field_name = "sha1" elif check_hash == "sha256": hash_type = hashlib.sha256() checksums = self.get("Checksums-Sha256") field_name = "sha256" elif check_hash == "md5": hash_type = hashlib.md5() checksums = self.get("Files") field_name = "md5sum" changed_files = None # appease pylint for changed_files in checksums: if changed_files['name'] == os.path.basename(filename): break else: assert( "get_files() returns different files than Files: knows?!") with open(os.path.join(self._directory, filename), "rb") as fc: while True: chunk = fc.read(131072) if not chunk: break hash_type.update(chunk) fc.close() if not hash_type.hexdigest() == changed_files[field_name]: raise ChangesFileException( "Checksum mismatch for file %s: %s != %s" % ( filename, hash_type.hexdigest(), changed_files[field_name] )) else: logger.debug("%s Checksum for file %s matches", field_name, filename)
def validate_checksums(self, check_hash="sha1"): """ Validate checksums for a package, using ``check_hack``'s type to validate the package. Valid ``check_hash`` types: * sha1 * sha256 * md5 * md5sum """ logger.debug("validating %s checksums", check_hash) for filename in self.get_files(): if check_hash == "sha1": hash_type = hashlib.sha1() checksums = self.get("Checksums-Sha1") field_name = "sha1" elif check_hash == "sha256": hash_type = hashlib.sha256() checksums = self.get("Checksums-Sha256") field_name = "sha256" elif check_hash == "md5": hash_type = hashlib.md5() checksums = self.get("Files") field_name = "md5sum" changed_files = None # appease pylint for changed_files in checksums: if changed_files['name'] == os.path.basename(filename): break else: assert ( "get_files() returns different files than Files: knows?!") with open(os.path.join(self._directory, filename), "rb") as fc: while True: chunk = fc.read(131072) if not chunk: break hash_type.update(chunk) fc.close() if not hash_type.hexdigest() == changed_files[field_name]: raise ChangesFileException( "Checksum mismatch for file %s: %s != %s" % (filename, hash_type.hexdigest(), changed_files[field_name])) else: logger.debug("%s Checksum for file %s matches", field_name, filename)
def output_html(difference, css_url=None, print_func=None, max_page_size=None): if print_func is None: print_func = print if max_page_size is None: max_page_size = DEFAULT_MAX_PAGE_SIZE print_func = create_limited_print_func(print_func, max_page_size) try: output_header(css_url, print_func) output_difference(difference, print_func, []) except PrintLimitReached: logger.debug('print limit reached') print_func(u"<div class='error'>Max output size reached.</div>", force=True) print_func(FOOTER % {'version': VERSION}, force=True)
def compare_unknown(path1, path2, source=None): logger.debug("compare unknown path: %s and %s", path1, path2) mime_type1 = guess_mime_type(path1) mime_type2 = guess_mime_type(path2) logger.debug("mime_type1: %s | mime_type2: %s", mime_type1, mime_type2) if mime_type1.startswith('text/') and mime_type2.startswith('text/'): encodings1 = re.findall(r'; charset=([^ ]+)', mime_type1) encodings2 = re.findall(r'; charset=([^ ]+)', mime_type2) if len(encodings1) > 0 and encodings1 == encodings2: encoding = encodings1[0] else: encoding = None return compare_text_files(path1, path2, encoding, source) return compare_binary_files(path1, path2, source)
def filter(self, line): if not self._encoding: if line == '': logger.debug("unable to determine PO encoding, let's hope it's utf-8") return self._header self._header += line found = Msgunfmt.CHARSET_RE.match(line) if found: self._encoding = found.group(1) return self._header.decode(self._encoding).encode('utf-8') return '' if self._encoding != 'utf-8': return line.decode(self._encoding).encode('utf-8') else: return line
def filter(self, line): if not self._encoding: self._header.write(line) if line == '\n': logger.debug("unable to determine PO encoding, let's hope it's utf-8") self._encoding = 'utf-8' return self._header.getvalue() found = Msgunfmt.CHARSET_RE.match(line) if found: self._encoding = found.group(1).lower() return self._header.getvalue().decode(self._encoding).encode('utf-8') return '' if self._encoding != 'utf-8': return line.decode(self._encoding).encode('utf-8') else: return line
def filter(self, line): if not self._encoding: if line == '': logger.debug( "unable to determine PO encoding, let's hope it's utf-8") return self._header self._header += line found = Msgunfmt.CHARSET_RE.match(line) if found: self._encoding = found.group(1) return self._header.decode(self._encoding).encode('utf-8') return '' if self._encoding != 'utf-8': return line.decode(self._encoding).encode('utf-8') else: return line
def output_html(differences, css_url=None, print_func=None, max_page_size=None): if print_func is None: print_func = print if max_page_size is None: max_page_size = DEFAULT_MAX_PAGE_SIZE print_func = create_limited_print_func(print_func, max_page_size) try: output_header(css_url, print_func) for difference in differences: output_difference(difference, print_func, []) except PrintLimitReached: logger.debug('print limit reached') print_func(u"<div class='error'>Max output size reached.</div>", force=True) print_func(FOOTER % {'version': VERSION}, force=True)
def compare_directories(path1, path2, source=None): differences = [] logger.debug('path1 files: %s' % sorted(set(os.listdir(path1)))) logger.debug('path2 files: %s' % sorted(set(os.listdir(path2)))) for name in sorted(set(os.listdir(path1)).intersection(set(os.listdir(path2)))): logger.debug('compare %s' % name) in_path1 = os.path.join(path1, name) in_path2 = os.path.join(path2, name) in_differences = debbindiff.comparators.compare_files( in_path1, in_path2, source=name) if not os.path.isdir(in_path1): if in_differences: in_differences[0].add_details(compare_meta(in_path1, in_path2)) else: d = Difference(None, path1, path2, source=name) d.add_details(compare_meta(in_path1, in_path2)) in_differences = [d] differences.extend(in_differences) ls1 = ls(path1) ls2 = ls(path2) difference = Difference.from_unicode(ls1, ls2, path1, path2, source="ls") if difference: differences.append(difference) differences.extend(compare_meta(path1, path2)) if differences: d = Difference(None, path1, path2, source=source) d.add_details(differences) return [d] return []
def compare_tar_files(path1, path2, source=None): differences = [] with tarfile.open(path1, 'r') as tar1: with tarfile.open(path2, 'r') as tar2: # look up differences in content with make_temp_directory() as temp_dir1: with make_temp_directory() as temp_dir2: logger.debug('content1 %s', tar1.getnames()) logger.debug('content2 %s', tar2.getnames()) for name in sorted(set(tar1.getnames()) .intersection(tar2.getnames())): member1 = tar1.getmember(name) member2 = tar2.getmember(name) if not member1.isfile() or not member2.isfile(): continue logger.debug('extract member %s', name) tar1.extract(name, temp_dir1) tar2.extract(name, temp_dir2) in_path1 = os.path.join(temp_dir1, name).decode('utf-8') in_path2 = os.path.join(temp_dir2, name).decode('utf-8') differences.append( debbindiff.comparators.compare_files( in_path1, in_path2, source=name.decode('utf-8'))) os.unlink(in_path1) os.unlink(in_path2) # look up differences in file list and file metadata content1 = get_tar_content(tar1).decode('utf-8') content2 = get_tar_content(tar2).decode('utf-8') differences.append(Difference.from_unicode( content1, content2, path1, path2, source="metadata")) return differences
def compare_deb_files(path1, path2, source=None): differences = [] # look up differences in content ar1 = ArFile(filename=path1) ar2 = ArFile(filename=path2) with make_temp_directory() as temp_dir1: with make_temp_directory() as temp_dir2: logger.debug('content1 %s', ar1.getnames()) logger.debug('content2 %s', ar2.getnames()) for name in sorted(set(ar1.getnames()) .intersection(ar2.getnames())): logger.debug('extract member %s', name) member1 = ar1.getmember(name) member2 = ar2.getmember(name) in_path1 = os.path.join(temp_dir1, name) in_path2 = os.path.join(temp_dir2, name) with open(in_path1, 'w') as f1: f1.write(member1.read()) with open(in_path2, 'w') as f2: f2.write(member2.read()) differences.extend( debbindiff.comparators.compare_files( in_path1, in_path2, source=name)) os.unlink(in_path1) os.unlink(in_path2) # look up differences in file list and file metadata content1 = get_ar_content(path1) content2 = get_ar_content(path2) difference = Difference.from_unicode( content1, content2, path1, path2, source="metadata") if difference: differences.append(difference) return differences
def join(self): ex_info = self.wait_for_exc_info() if ex_info is None: return else: except_type, except_class, tb = ex_info logger.debug('Exception: %s' % traceback.format_exception_only(except_type, except_class)[0].strip()) logger.debug('Traceback:') for line in traceback.format_list(tb): logger.debug(line[:-1]) raise except_type, except_class, None
def compare_tar_files(path1, path2, source=None): differences = [] with tarfile.open(path1, 'r') as tar1: with tarfile.open(path2, 'r') as tar2: # look up differences in content with make_temp_directory() as temp_dir1: with make_temp_directory() as temp_dir2: logger.debug('content1 %s', tar1.getnames()) logger.debug('content2 %s', tar2.getnames()) for name in sorted( set(tar1.getnames()).intersection( tar2.getnames())): member1 = tar1.getmember(name) member2 = tar2.getmember(name) if not member1.isfile() or not member2.isfile(): continue logger.debug('extract member %s', name) tar1.extract(name, temp_dir1) tar2.extract(name, temp_dir2) in_path1 = os.path.join(temp_dir1, name) in_path2 = os.path.join(temp_dir2, name) differences.extend( debbindiff.comparators.compare_files(in_path1, in_path2, source=name)) os.unlink(in_path1) os.unlink(in_path2) # look up differences in file list and file metadata content1 = get_tar_content(tar1).decode('utf-8') content2 = get_tar_content(tar2).decode('utf-8') difference = Difference.from_unicode(content1, content2, path1, path2, source="metadata") if difference: differences.append(difference) return differences
def compare_deb_files(path1, path2, source=None): differences = [] # look up differences in content ar1 = ArFile(filename=path1) ar2 = ArFile(filename=path2) with make_temp_directory() as temp_dir1: with make_temp_directory() as temp_dir2: logger.debug('content1 %s', ar1.getnames()) logger.debug('content2 %s', ar2.getnames()) for name in sorted( set(ar1.getnames()).intersection(ar2.getnames())): logger.debug('extract member %s', name) member1 = ar1.getmember(name) member2 = ar2.getmember(name) in_path1 = os.path.join(temp_dir1, name) in_path2 = os.path.join(temp_dir2, name) with open(in_path1, 'w') as f1: f1.write(member1.read()) with open(in_path2, 'w') as f2: f2.write(member2.read()) differences.extend( debbindiff.comparators.compare_files(in_path1, in_path2, source=name)) os.unlink(in_path1) os.unlink(in_path2) # look up differences in file list and file metadata content1 = get_ar_content(path1) content2 = get_ar_content(path2) difference = Difference.from_unicode(content1, content2, path1, path2, source="metadata") if difference: differences.append(difference) return differences
if dot_changes1[field] != dot_changes2[field]: content1 = "%s: %s" % (field, dot_changes1[field]) content2 = "%s: %s" % (field, dot_changes2[field]) difference = Difference.from_unicode( content1, content2, dot_changes1.get_changes_file(), dot_changes2.get_changes_file(), source=source) if difference: differences.append(difference) # This will handle differences in the list of files, checksums, priority # and section files1 = dot_changes1.get('Files') files2 = dot_changes2.get('Files') logger.debug(dot_changes1.get_as_string('Files')) files_difference = Difference.from_unicode( dot_changes1.get_as_string('Files'), dot_changes2.get_as_string('Files'), dot_changes1.get_changes_file(), dot_changes2.get_changes_file(), source=source, comment="List of files does not match") if not files_difference: return differences files1 = dict([(d['name'], d) for d in files1]) files2 = dict([(d['name'], d) for d in files2])
files_difference = Difference.from_unicode( dot_changes1.get_as_string('Files'), dot_changes2.get_as_string('Files'), path1, path2, source='Files') if not files_difference: return differences differences.append(files_difference) # we are only interested in file names files1 = dict([(d['name'], d) for d in dot_changes1.get('Files')]) files2 = dict([(d['name'], d) for d in dot_changes2.get('Files')]) for filename in sorted(set(files1.keys()).intersection(files2.keys())): d1 = files1[filename] d2 = files2[filename] if d1['md5sum'] != d2['md5sum']: logger.debug("%s mentioned in .changes have " "differences", filename) differences.append( debbindiff.comparators.compare_files( dot_changes1.get_path(filename), dot_changes2.get_path(filename), source=get_source(dot_changes1.get_path(filename), dot_changes2.get_path(filename)))) return differences