def test_has_visuals(monkeypatch, gif3, gif4): monkeypatch.setattr(Config(), 'compute_visual_diffs', True) gif_diff = gif3.compare(gif4) assert len(gif_diff.details) == 2 assert len(gif_diff.details[1].visuals) == 2 assert gif_diff.details[1].visuals[0].data_type == 'image/png;base64' assert gif_diff.details[1].visuals[1].data_type == 'image/gif;base64'
def test_has_visuals(monkeypatch, image1, image2): monkeypatch.setattr(Config(), 'compute_visual_diffs', True) ico_diff = image1.compare(image2) assert len(ico_diff.details) == 2 assert len(ico_diff.details[0].visuals) == 2 assert ico_diff.details[0].visuals[0].data_type == 'image/png;base64' assert ico_diff.details[0].visuals[1].data_type == 'image/gif;base64'
def compare_details(self, other, source=None): content_diff = Difference.from_command(Img2Txt, self.path, other.path, source="Image content") if (content_diff is not None and Config().compute_visual_diffs and same_size(self, other)): try: logger.debug( "Generating visual difference for %s and %s", self.path, other.path, ) content_diff.add_visuals([ pixel_difference(self.path, other.path), flicker_difference(self.path, other.path), ]) except subprocess.CalledProcessError: # noqa pass return [ content_diff, Difference.from_command(Identify, self.path, other.path, source="Image metadata"), ]
def compare_details(self, other, source=None): differences = [] # img2txt does not support .ico files directly so convert to .PNG. try: png_a, png_b = [ICOImageFile.convert(x) for x in (self, other)] except subprocess.CalledProcessError: # noqa pass else: content_diff = Difference.from_command(Img2Txt, png_a, png_b, source="Image content") if (content_diff is not None and Config().compute_visual_diffs and same_size(self, other)): if get_image_size(self.path) == get_image_size(other.path): logger.debug( "Generating visual difference for %s and %s", self.path, other.path, ) content_diff.add_visuals([ pixel_difference(self.path, other.path), flicker_difference(self.path, other.path), ]) differences.append(content_diff) differences.append( Difference.from_command(Identify, self.path, other.path, source="Image metadata")) return differences
def compare_details(self, other, source=None): sng_diff = Difference.from_command( Sng, self.path, other.path, source='sng' ) differences = [sng_diff] if ( sng_diff is not None and Config().compute_visual_diffs and same_size(self, other) ): try: logger.debug( "Generating visual difference for %s and %s", self.path, other.path, ) content_diff = Difference( None, self.path, other.path, source="Image content" ) content_diff.add_visuals( [ pixel_difference(self.path, other.path), flicker_difference(self.path, other.path), ] ) differences.append(content_diff) except subprocess.CalledProcessError: # noqa pass return differences
def test_has_visuals(monkeypatch, png1, png2): monkeypatch.setattr(Config(), 'compute_visual_diffs', True) png_diff = png1.compare(png2) assert len(png_diff.details) == 2 assert len(png_diff.details[1].visuals) == 2 assert png_diff.details[1].visuals[0].data_type == 'image/png;base64' assert png_diff.details[1].visuals[1].data_type == 'image/gif;base64'
def __init__(self, print_func, color): self.print_func = create_limited_print_func( print_func, Config().max_text_report_size) self.color = color super().__init__()
def compare_details(self, other, source=None): gifbuild_diff = Difference.from_command( Gifbuild, self.path, other.path, source="gifbuild", ) differences = [gifbuild_diff] if gifbuild_diff is not None and Config().compute_visual_diffs and \ can_compose_gif_images(self, other): try: logger.debug( "Generating visual difference for %s and %s", self.path, other.path, ) content_diff = Difference( None, self.path, other.path, source="Image content", ) content_diff.add_visuals([ pixel_difference(self.path, other.path), flicker_difference(self.path, other.path), ]) differences.append(content_diff) except subprocess.CalledProcessError: # noqa pass return differences
def test_too_much_input_for_diff(monkeypatch): monkeypatch.setattr(Config(), 'max_diff_input_lines', 20) too_long_text_a = io.StringIO("a\n" * 21) too_long_text_b = io.StringIO("b\n" * 21) difference = Difference.from_text_readers(too_long_text_a, too_long_text_b, 'a', 'b') assert '[ Too much input for diff ' in difference.unified_diff
def comparisons(self, other): my_members = OrderedDict(self.get_adjusted_members_sizes()) other_members = OrderedDict(other.get_adjusted_members_sizes()) total_size = sum( x[1] for x in itertools.chain( my_members.values(), other_members.values() ) ) # TODO: progress could be a bit more accurate here, give more weight to fuzzy-hashed files # TODO: merge DirectoryContainer.comparisons() into this with Progress(total_size) as p: def prep_yield(my_name, other_name, comment=NO_COMMENT): my_member, my_size = my_members.pop(my_name) other_member, other_size = other_members.pop(other_name) p.begin_step(my_size + other_size, msg=my_member.progress_name) return my_member, other_member, comment # if both containers contain 1 element, compare these if len(my_members) == 1 and len(other_members) == 1: yield prep_yield( next(iter(my_members.keys())), next(iter(other_members.keys())), ) return other_names = set(other_members.keys()) # keep it sorted like my_members both_names = [ name for name in my_members.keys() if name in other_names ] for name in both_names: yield prep_yield(name, name) for my_name, other_name, score in self.perform_fuzzy_matching( my_members, other_members ): comment = ( "Files similar despite different names" " (score: {}, lower is more similar)".format(score) ) if score == 0: comment = "Files identical despite different names" yield prep_yield(my_name, other_name, comment) if Config().new_file: for my_member, my_size in my_members.values(): p.begin_step(my_size, msg=my_member.progress_name) yield my_member, MissingFile( '/dev/null', my_member ), NO_COMMENT for other_member, other_size in other_members.values(): p.begin_step(other_size, msg=other_member.progress_name) yield MissingFile( '/dev/null', other_member ), other_member, NO_COMMENT
def filter_excludes(filenames): for x in filenames: for y in Config().excludes: if fnmatch.fnmatchcase(x, y): logger.debug("Excluding %s as it matches pattern '%s'", x, y) break else: yield x
def command_excluded(command): for y in Config().exclude_commands: if re.search(y, command): logger.debug( "Excluding command '%s' as it matches pattern '%s'", command, y ) return True return False
def output_node_placeholder(self, pagename, lazy_load, size=0): if lazy_load: return templates.DIFFNODE_LAZY_LOAD % { "pagename": pagename, "pagesize": sizeof_fmt(Config().max_page_size_child), "size": sizeof_fmt(size), } else: return templates.DIFFNODE_LIMIT
def row_was_output(): global spl_print_func, spl_print_ctrl, spl_rows, spl_current_page spl_rows += 1 _, rotation_params = spl_print_ctrl max_lines = Config().max_diff_block_lines max_lines_parent = Config().max_diff_block_lines_parent max_lines_ratio = Config().max_diff_block_lines_html_dir_ratio max_report_child_size = Config().max_report_child_size if not rotation_params: # html-dir single output, don't need to rotate if spl_rows >= max_lines: raise DiffBlockLimitReached() return else: # html-dir output, perhaps need to rotate directory, mainname, css_url = rotation_params if spl_rows >= max_lines_ratio * max_lines: raise DiffBlockLimitReached() if spl_current_page == 0: # on parent page if spl_rows < max_lines_parent: return else: # on child page # TODO: make this stay below the max, instead of going 1 row over the max # will require some backtracking... if spl_print_func.bytes_written < max_report_child_size: return spl_current_page += 1 filename = "%s-%s.html" % (mainname, spl_current_page) if spl_current_page > 1: # previous page was a child, close it spl_print_func(templates.UD_TABLE_FOOTER % { "filename": html.escape(filename), "text": "load diff" }, force=True) spl_print_exit(None, None, None) # rotate to the next child page context = spl_file_printer(directory, filename) spl_print_enter(context, rotation_params) spl_print_func(templates.UD_TABLE_HEADER)
def output_html_directory(directory, difference, css_url=None, jquery_url=None): """ Multi-file presenter. Writes to a directory, and puts large diff tables into files of their own. This uses jQuery. By default it uses /usr/share/javascript/jquery/jquery.js (symlinked, so that you can still share the result over HTTP). You can also pass --jquery URL to diffoscope to use a central jQuery copy. """ if not os.path.exists(directory): os.makedirs(directory) if not os.path.isdir(directory): raise ValueError("%s is not a directory" % directory) if not jquery_url: jquery_symlink = os.path.join(directory, "jquery.js") if os.path.exists(jquery_symlink): jquery_url = "./jquery.js" else: if os.path.lexists(jquery_symlink): os.unlink(jquery_symlink) for path in JQUERY_SYSTEM_LOCATIONS: if os.path.exists(path): os.symlink(path, jquery_symlink) jquery_url = "./jquery.js" break if not jquery_url: logger.warning( '--jquery was not specified and jQuery was not found in any known location. Disabling on-demand inline loading.' ) logger.debug('Locations searched: %s', ', '.join(JQUERY_SYSTEM_LOCATIONS)) if jquery_url == 'disable': jquery_url = None with file_printer(directory, "index.html") as print_func: print_func = create_limited_print_func(print_func, Config().max_report_size) try: output_header(css_url, print_func) output_difference(difference, print_func, css_url, directory, []) except PrintLimitReached: logger.debug('print limit reached') print_func(u'<div class="error">Max output size reached.</div>', force=True) if jquery_url: print_func(templates.SCRIPTS % {'jquery_url': html.escape(jquery_url)}, force=True) output_footer(print_func)
def compare_details(self, other, source=None): if Config().exclude_directory_metadata == 'recursive': return [] zipinfo_difference = ( Difference.from_command(MozillaZipinfo, self.path, other.path) or Difference.from_command( MozillaZipinfoVerbose, self.path, other.path ) or Difference.from_command(BsdtarVerbose, self.path, other.path) ) return [zipinfo_difference]
def list_libarchive(path, ignore_errors=False): try: with libarchive.file_reader(path) as archive: for entry in archive: name_and_link = entry.name if entry.issym: name_and_link = '{entry.name} -> {entry.linkname}'.format( entry=entry ) if Config().exclude_directory_metadata == 'recursive': yield '{name_and_link}\n'.format( name_and_link=name_and_link ) continue if entry.isblk or entry.ischr: size_or_dev = '{major:>3},{minor:>3}'.format( major=entry.rdevmajor, minor=entry.rdevminor ) else: size_or_dev = entry.size mtime = time.strftime( '%Y-%m-%d %H:%M:%S', time.gmtime(entry.mtime) ) + '.{:06d}'.format(entry.mtime_nsec // 1000) if entry.uname: user = '******'.format( user=entry.uname.decode( 'utf-8', errors='surrogateescape' ), uid='({})'.format(entry.uid), ) else: user = entry.uid if entry.gname: group = '{group:<8} {gid:>7}'.format( group=entry.gname.decode( 'utf-8', errors='surrogateescape' ), gid='({})'.format(entry.gid), ) else: group = entry.gid yield '{strmode} {entry.nlink:>3} {user:>8} {group:>8} {size_or_dev:>8} {mtime:>8} {name_and_link}\n'.format( strmode=entry.strmode.decode('us-ascii'), entry=entry, user=user, group=group, size_or_dev=size_or_dev, mtime=mtime, name_and_link=name_and_link, ) except libarchive.exception.ArchiveError: if not ignore_errors: raise
def filter_excludes(filenames): result = [] for x in filenames: for y in Config().excludes: if fnmatch.fnmatchcase(x, y): logger.debug("Excluding %s as it matches pattern '%s'", x, y) break else: result.append(x) return result
def compare_root_paths(path1, path2): from ..directory import FilesystemDirectory, FilesystemFile, compare_directories if not Config().new_file: bail_if_non_existing(path1, path2) if any_excluded(path1, path2): return None if os.path.isdir(path1) and os.path.isdir(path2): return compare_directories(path1, path2) container1 = FilesystemDirectory(os.path.dirname(path1)).as_container file1 = specialize(FilesystemFile(path1, container=container1)) container2 = FilesystemDirectory(os.path.dirname(path2)).as_container file2 = specialize(FilesystemFile(path2, container=container2)) return compare_files(file1, file2)
def output_html(difference, css_url=None, print_func=None): """ Default presenter, all in one HTML file """ if print_func is None: print_func = print print_func = create_limited_print_func(print_func, Config().max_report_size) try: output_header(css_url, print_func) output_difference(difference, print_func, css_url, None, []) except PrintLimitReached: logger.debug('print limit reached') print_func(u'<div class="error">Max output size reached.</div>', force=True) output_footer(print_func)
def comparisons(self, other): my_md5sums = {} other_md5sums = {} if self.source: my_md5sums = self.source.container.source.container.source.md5sums if other.source: other_md5sums = other.source.container.source.container.source.md5sums for my_member, other_member, comment in super().comparisons(other): if not Config().force_details and \ my_member.name == other_member.name and \ my_md5sums.get(my_member.name, 'my') == other_md5sums.get(other_member.name, 'other'): logger.debug("Skip %s: identical md5sum", my_member.name) continue yield my_member, other_member, comment
def compare_details(self, other, source=None): differences = [] zipinfo_difference = None if Config().exclude_directory_metadata != 'recursive': zipinfo_difference = ( Difference.from_command(Zipinfo, self.path, other.path) or Difference.from_command( ZipinfoVerbose, self.path, other.path ) or Difference.from_command( BsdtarVerbose, self.path, other.path ) ) zipnote_difference = Difference.from_command( Zipnote, self.path, other.path ) for x in (zipinfo_difference, zipnote_difference): if x is not None: differences.append(x) return differences
def _compare_using_details(self, other, source): details = [] difference = Difference(None, self.name, other.name, source=source) if hasattr(self, 'compare_details'): details.extend(self.compare_details(other, source)) if self.as_container: if self.as_container.auto_diff_metadata: details.extend([ Difference.from_text( self.magic_file_type, other.magic_file_type, self, other, source='filetype from file(1)', ), Difference.from_text( self.__class__.__name__, other.__class__.__name__, self, other, source='filetype from diffoscope', ), ]) # Don't recurse forever on archive quines, etc. depth = self._as_container.depth no_recurse = depth >= Config().max_container_depth if no_recurse: msg = "Reached max container depth ({})".format(depth) logger.debug(msg) difference.add_comment(msg) details.extend( self.as_container.compare(other.as_container, no_recurse=no_recurse)) details = [x for x in details if x] if not details: return None difference.add_details(details) return difference
def compare_meta(path1, path2): if Config().exclude_directory_metadata in ('yes', 'recursive'): logger.debug( "Excluding directory metadata for paths (%s, %s)", path1, path2 ) return [] logger.debug('compare_meta(%s, %s)', path1, path2) differences = [] # Don't run any commands if any of the paths do not exist if not os.path.exists(path1) or not os.path.exists(path2): return differences try: differences.append(Difference.from_command(Stat, path1, path2)) except RequiredToolNotFound: logger.error("Unable to find 'stat'! Is PATH wrong?") if os.path.islink(path1) or os.path.islink(path2): return [d for d in differences if d is not None] try: differences.append(Difference.from_command(Getfacl, path1, path2)) except RequiredToolNotFound: logger.info( "Unable to find 'getfacl', some directory metadata differences might not be noticed." ) try: lsattr1 = lsattr(path1) lsattr2 = lsattr(path2) differences.append( Difference.from_text( lsattr1, lsattr2, path1, path2, source='lsattr' ) ) except RequiredToolNotFound: logger.info( "Unable to find 'lsattr', some directory metadata differences might not be noticed." ) differences.append(xattr(path1, path2)) return [d for d in differences if d is not None]
def compare_files(file1, file2, source=None, diff_content_only=False): logger.debug( "Comparing %s (%s) and %s (%s)", file1.name, file1.__class__.__name__, file2.name, file2.__class__.__name__, ) if any_excluded(file1.name, file2.name): return None force_details = Config().force_details with profile('has_same_content_as', file1): has_same_content = file1.has_same_content_as(file2) if has_same_content: if not force_details: logger.debug( "has_same_content_as returned True; skipping further comparisons" ) return None if diff_content_only: return None elif diff_content_only: assert not has_same_content return Difference(None, file1.name, file2.name, comment="Files differ") specialize(file1) specialize(file2) if isinstance(file1, MissingFile): file1.other_file = file2 elif isinstance(file2, MissingFile): file2.other_file = file1 elif (file1.__class__.__name__ != file2.__class__.__name__) and ( file1.as_container is None or file2.as_container is None): return file1.compare_bytes(file2, source) with profile('compare_files (cumulative)', file1): return file1.compare(file2, source)
def _compare_using_details(self, other, source): details = [] difference = Difference(None, self.name, other.name, source=source) if hasattr(self, 'compare_details'): details.extend(self.compare_details(other, source)) if self.as_container: # Don't recursve forever on archive quines, etc. depth = self._as_container.depth no_recurse = (depth >= Config().max_container_depth) if no_recurse: msg = "Reached max container depth ({})".format(depth) logger.debug(msg) difference.add_comment(msg) details.extend( self.as_container.compare(other.as_container, no_recurse=no_recurse)) details = [x for x in details if x] if not details: return None difference.add_details(details) return difference
def process_node(node, score): path = score[3] diff_path = output_diff_path(path) pagename = md5(diff_path) logger.debug('html output for %s', diff_path) ancestor = ancestors.pop(node, None) assert ancestor in path or (ancestor is None and node is root_difference) node_output, node_continuation = output_node( ctx, node, path, " ", len(path) - 1) add_to_existing = False if ancestor: page_limit = (Config().max_page_size if ancestor is root_difference else Config().max_page_size_child) page_current = outputs[ancestor].size(placeholder_len) report_current = self.report_printed + sum( p.size(placeholder_len) for p in outputs.values()) want_to_add = node_output.size(placeholder_len) logger.debug( "report size: %s/%s, page size: %s/%s, want to add %s)", report_current, self.report_limit, page_current, page_limit, want_to_add, ) if report_current + want_to_add > self.report_limit: make_new_subpage = False elif page_current + want_to_add < page_limit: add_to_existing = True else: make_new_subpage = not ctx.single_page if add_to_existing: # under limit, add it to an existing page outputs[ancestor] = outputs[ancestor].pformat( {node: node_output}) stored = ancestor else: # over limit (or root), new subpage or continue/break if ancestor: placeholder = self.output_node_placeholder( pagename, make_new_subpage, node.size()) outputs[ancestor] = outputs[ancestor].pformat( {node: placeholder}) self.maybe_print(ancestor, printers, outputs, continuations) footer = output_footer() # we hit a limit, either max-report-size or single-page if not make_new_subpage: if outputs: # True = don't traverse this node's children, # because they won't be output however there are # holes in other pages, so don't break the loop # just yet return True # No more holes, don't traverse any more nodes raise StopIteration else: # Unconditionally write the root node regardless of limits assert node is root_difference footer = output_footer(ctx.jquery_url) pagename = "index" outputs[node] = node_output.frame( output_header(ctx.css_url, ctx.our_css_url, ctx.icon_url) + u'<div class="difference">\n', u'</div>\n' + footer, ) assert not ctx.single_page or node is root_difference printers[node] = ((make_printer, ctx.target) if ctx.single_page else (file_printer, ctx.target, "%s.html" % pagename)) stored = node for child in node.details: logger.debug( "scheduling future html output for: %s", output_diff_path(path + [child]), ) ancestors[child] = stored conts = continuations.setdefault(stored, []) if node_continuation: conts.append(node_continuation) self.maybe_print(stored, printers, outputs, continuations)
def reset(self): self.report_printed = 0 self.report_limit = Config().max_report_size
def __init__(self): self.max_lines = Config().max_diff_block_lines # only for html-dir self.max_lines_parent = Config().max_page_diff_block_lines self.max_page_size_child = Config().max_page_size_child
def test_compare_non_existing(monkeypatch, gzip1): monkeypatch.setattr(Config(), 'new_file', True) difference = gzip1.compare(MissingFile('/nonexisting', gzip1)) assert difference.source2 == '/nonexisting' assert difference.details[-1].source2 == '/dev/null'