Esempio n. 1
0
def test_has_visuals(monkeypatch, gif3, gif4):
    monkeypatch.setattr(Config(), 'compute_visual_diffs', True)
    gif_diff = gif3.compare(gif4)
    assert len(gif_diff.details) == 2
    assert len(gif_diff.details[1].visuals) == 2
    assert gif_diff.details[1].visuals[0].data_type == 'image/png;base64'
    assert gif_diff.details[1].visuals[1].data_type == 'image/gif;base64'
Esempio n. 2
0
def test_has_visuals(monkeypatch, image1, image2):
    monkeypatch.setattr(Config(), 'compute_visual_diffs', True)
    ico_diff = image1.compare(image2)
    assert len(ico_diff.details) == 2
    assert len(ico_diff.details[0].visuals) == 2
    assert ico_diff.details[0].visuals[0].data_type == 'image/png;base64'
    assert ico_diff.details[0].visuals[1].data_type == 'image/gif;base64'
Esempio n. 3
0
 def compare_details(self, other, source=None):
     content_diff = Difference.from_command(Img2Txt,
                                            self.path,
                                            other.path,
                                            source="Image content")
     if (content_diff is not None and Config().compute_visual_diffs
             and same_size(self, other)):
         try:
             logger.debug(
                 "Generating visual difference for %s and %s",
                 self.path,
                 other.path,
             )
             content_diff.add_visuals([
                 pixel_difference(self.path, other.path),
                 flicker_difference(self.path, other.path),
             ])
         except subprocess.CalledProcessError:  # noqa
             pass
     return [
         content_diff,
         Difference.from_command(Identify,
                                 self.path,
                                 other.path,
                                 source="Image metadata"),
     ]
Esempio n. 4
0
    def compare_details(self, other, source=None):
        differences = []

        # img2txt does not support .ico files directly so convert to .PNG.
        try:
            png_a, png_b = [ICOImageFile.convert(x) for x in (self, other)]
        except subprocess.CalledProcessError:  # noqa
            pass
        else:
            content_diff = Difference.from_command(Img2Txt,
                                                   png_a,
                                                   png_b,
                                                   source="Image content")
            if (content_diff is not None and Config().compute_visual_diffs
                    and same_size(self, other)):
                if get_image_size(self.path) == get_image_size(other.path):
                    logger.debug(
                        "Generating visual difference for %s and %s",
                        self.path,
                        other.path,
                    )
                    content_diff.add_visuals([
                        pixel_difference(self.path, other.path),
                        flicker_difference(self.path, other.path),
                    ])
            differences.append(content_diff)

        differences.append(
            Difference.from_command(Identify,
                                    self.path,
                                    other.path,
                                    source="Image metadata"))

        return differences
Esempio n. 5
0
    def compare_details(self, other, source=None):
        sng_diff = Difference.from_command(
            Sng, self.path, other.path, source='sng'
        )
        differences = [sng_diff]

        if (
            sng_diff is not None
            and Config().compute_visual_diffs
            and same_size(self, other)
        ):
            try:
                logger.debug(
                    "Generating visual difference for %s and %s",
                    self.path,
                    other.path,
                )
                content_diff = Difference(
                    None, self.path, other.path, source="Image content"
                )
                content_diff.add_visuals(
                    [
                        pixel_difference(self.path, other.path),
                        flicker_difference(self.path, other.path),
                    ]
                )
                differences.append(content_diff)
            except subprocess.CalledProcessError:  # noqa
                pass

        return differences
Esempio n. 6
0
def test_has_visuals(monkeypatch, png1, png2):
    monkeypatch.setattr(Config(), 'compute_visual_diffs', True)
    png_diff = png1.compare(png2)
    assert len(png_diff.details) == 2
    assert len(png_diff.details[1].visuals) == 2
    assert png_diff.details[1].visuals[0].data_type == 'image/png;base64'
    assert png_diff.details[1].visuals[1].data_type == 'image/gif;base64'
Esempio n. 7
0
    def __init__(self, print_func, color):
        self.print_func = create_limited_print_func(
            print_func,
            Config().max_text_report_size)
        self.color = color

        super().__init__()
Esempio n. 8
0
    def compare_details(self, other, source=None):
        gifbuild_diff = Difference.from_command(
            Gifbuild,
            self.path,
            other.path,
            source="gifbuild",
        )

        differences = [gifbuild_diff]

        if gifbuild_diff is not None and Config().compute_visual_diffs and \
                can_compose_gif_images(self, other):
            try:
                logger.debug(
                    "Generating visual difference for %s and %s",
                    self.path,
                    other.path,
                )
                content_diff = Difference(
                    None,
                    self.path,
                    other.path,
                    source="Image content",
                )
                content_diff.add_visuals([
                    pixel_difference(self.path, other.path),
                    flicker_difference(self.path, other.path),
                ])
                differences.append(content_diff)
            except subprocess.CalledProcessError:  # noqa
                pass

        return differences
Esempio n. 9
0
def test_too_much_input_for_diff(monkeypatch):
    monkeypatch.setattr(Config(), 'max_diff_input_lines', 20)
    too_long_text_a = io.StringIO("a\n" * 21)
    too_long_text_b = io.StringIO("b\n" * 21)
    difference = Difference.from_text_readers(too_long_text_a, too_long_text_b,
                                              'a', 'b')
    assert '[ Too much input for diff ' in difference.unified_diff
Esempio n. 10
0
    def comparisons(self, other):
        my_members = OrderedDict(self.get_adjusted_members_sizes())
        other_members = OrderedDict(other.get_adjusted_members_sizes())
        total_size = sum(
            x[1]
            for x in itertools.chain(
                my_members.values(), other_members.values()
            )
        )
        # TODO: progress could be a bit more accurate here, give more weight to fuzzy-hashed files
        # TODO: merge DirectoryContainer.comparisons() into this

        with Progress(total_size) as p:

            def prep_yield(my_name, other_name, comment=NO_COMMENT):
                my_member, my_size = my_members.pop(my_name)
                other_member, other_size = other_members.pop(other_name)
                p.begin_step(my_size + other_size, msg=my_member.progress_name)
                return my_member, other_member, comment

            # if both containers contain 1 element, compare these
            if len(my_members) == 1 and len(other_members) == 1:
                yield prep_yield(
                    next(iter(my_members.keys())),
                    next(iter(other_members.keys())),
                )
                return

            other_names = set(other_members.keys())
            # keep it sorted like my_members
            both_names = [
                name for name in my_members.keys() if name in other_names
            ]
            for name in both_names:
                yield prep_yield(name, name)

            for my_name, other_name, score in self.perform_fuzzy_matching(
                my_members, other_members
            ):
                comment = (
                    "Files similar despite different names"
                    " (score: {}, lower is more similar)".format(score)
                )
                if score == 0:
                    comment = "Files identical despite different names"
                yield prep_yield(my_name, other_name, comment)

            if Config().new_file:
                for my_member, my_size in my_members.values():
                    p.begin_step(my_size, msg=my_member.progress_name)
                    yield my_member, MissingFile(
                        '/dev/null', my_member
                    ), NO_COMMENT

                for other_member, other_size in other_members.values():
                    p.begin_step(other_size, msg=other_member.progress_name)
                    yield MissingFile(
                        '/dev/null', other_member
                    ), other_member, NO_COMMENT
Esempio n. 11
0
def filter_excludes(filenames):
    for x in filenames:
        for y in Config().excludes:
            if fnmatch.fnmatchcase(x, y):
                logger.debug("Excluding %s as it matches pattern '%s'", x, y)
                break
        else:
            yield x
Esempio n. 12
0
def command_excluded(command):
    for y in Config().exclude_commands:
        if re.search(y, command):
            logger.debug(
                "Excluding command '%s' as it matches pattern '%s'", command, y
            )
            return True
    return False
Esempio n. 13
0
 def output_node_placeholder(self, pagename, lazy_load, size=0):
     if lazy_load:
         return templates.DIFFNODE_LAZY_LOAD % {
             "pagename": pagename,
             "pagesize": sizeof_fmt(Config().max_page_size_child),
             "size": sizeof_fmt(size),
         }
     else:
         return templates.DIFFNODE_LIMIT
Esempio n. 14
0
def row_was_output():
    global spl_print_func, spl_print_ctrl, spl_rows, spl_current_page
    spl_rows += 1
    _, rotation_params = spl_print_ctrl
    max_lines = Config().max_diff_block_lines
    max_lines_parent = Config().max_diff_block_lines_parent
    max_lines_ratio = Config().max_diff_block_lines_html_dir_ratio
    max_report_child_size = Config().max_report_child_size
    if not rotation_params:
        # html-dir single output, don't need to rotate
        if spl_rows >= max_lines:
            raise DiffBlockLimitReached()
        return
    else:
        # html-dir output, perhaps need to rotate
        directory, mainname, css_url = rotation_params
        if spl_rows >= max_lines_ratio * max_lines:
            raise DiffBlockLimitReached()

        if spl_current_page == 0:  # on parent page
            if spl_rows < max_lines_parent:
                return
        else:  # on child page
            # TODO: make this stay below the max, instead of going 1 row over the max
            # will require some backtracking...
            if spl_print_func.bytes_written < max_report_child_size:
                return

    spl_current_page += 1
    filename = "%s-%s.html" % (mainname, spl_current_page)

    if spl_current_page > 1:
        # previous page was a child, close it
        spl_print_func(templates.UD_TABLE_FOOTER % {
            "filename": html.escape(filename),
            "text": "load diff"
        },
                       force=True)
        spl_print_exit(None, None, None)

    # rotate to the next child page
    context = spl_file_printer(directory, filename)
    spl_print_enter(context, rotation_params)
    spl_print_func(templates.UD_TABLE_HEADER)
Esempio n. 15
0
def output_html_directory(directory,
                          difference,
                          css_url=None,
                          jquery_url=None):
    """
    Multi-file presenter. Writes to a directory, and puts large diff tables
    into files of their own.

    This uses jQuery. By default it uses /usr/share/javascript/jquery/jquery.js
    (symlinked, so that you can still share the result over HTTP).
    You can also pass --jquery URL to diffoscope to use a central jQuery copy.
    """
    if not os.path.exists(directory):
        os.makedirs(directory)

    if not os.path.isdir(directory):
        raise ValueError("%s is not a directory" % directory)

    if not jquery_url:
        jquery_symlink = os.path.join(directory, "jquery.js")
        if os.path.exists(jquery_symlink):
            jquery_url = "./jquery.js"
        else:
            if os.path.lexists(jquery_symlink):
                os.unlink(jquery_symlink)
            for path in JQUERY_SYSTEM_LOCATIONS:
                if os.path.exists(path):
                    os.symlink(path, jquery_symlink)
                    jquery_url = "./jquery.js"
                    break
            if not jquery_url:
                logger.warning(
                    '--jquery was not specified and jQuery was not found in any known location. Disabling on-demand inline loading.'
                )
                logger.debug('Locations searched: %s',
                             ', '.join(JQUERY_SYSTEM_LOCATIONS))
    if jquery_url == 'disable':
        jquery_url = None

    with file_printer(directory, "index.html") as print_func:
        print_func = create_limited_print_func(print_func,
                                               Config().max_report_size)
        try:
            output_header(css_url, print_func)
            output_difference(difference, print_func, css_url, directory, [])
        except PrintLimitReached:
            logger.debug('print limit reached')
            print_func(u'<div class="error">Max output size reached.</div>',
                       force=True)
        if jquery_url:
            print_func(templates.SCRIPTS %
                       {'jquery_url': html.escape(jquery_url)},
                       force=True)
        output_footer(print_func)
Esempio n. 16
0
 def compare_details(self, other, source=None):
     if Config().exclude_directory_metadata == 'recursive':
         return []
     zipinfo_difference = (
         Difference.from_command(MozillaZipinfo, self.path, other.path)
         or Difference.from_command(
             MozillaZipinfoVerbose, self.path, other.path
         )
         or Difference.from_command(BsdtarVerbose, self.path, other.path)
     )
     return [zipinfo_difference]
Esempio n. 17
0
def list_libarchive(path, ignore_errors=False):
    try:
        with libarchive.file_reader(path) as archive:
            for entry in archive:
                name_and_link = entry.name
                if entry.issym:
                    name_and_link = '{entry.name} -> {entry.linkname}'.format(
                        entry=entry
                    )
                if Config().exclude_directory_metadata == 'recursive':
                    yield '{name_and_link}\n'.format(
                        name_and_link=name_and_link
                    )
                    continue
                if entry.isblk or entry.ischr:
                    size_or_dev = '{major:>3},{minor:>3}'.format(
                        major=entry.rdevmajor, minor=entry.rdevminor
                    )
                else:
                    size_or_dev = entry.size
                mtime = time.strftime(
                    '%Y-%m-%d %H:%M:%S', time.gmtime(entry.mtime)
                ) + '.{:06d}'.format(entry.mtime_nsec // 1000)
                if entry.uname:
                    user = '******'.format(
                        user=entry.uname.decode(
                            'utf-8', errors='surrogateescape'
                        ),
                        uid='({})'.format(entry.uid),
                    )
                else:
                    user = entry.uid
                if entry.gname:
                    group = '{group:<8} {gid:>7}'.format(
                        group=entry.gname.decode(
                            'utf-8', errors='surrogateescape'
                        ),
                        gid='({})'.format(entry.gid),
                    )
                else:
                    group = entry.gid
                yield '{strmode} {entry.nlink:>3} {user:>8} {group:>8} {size_or_dev:>8} {mtime:>8} {name_and_link}\n'.format(
                    strmode=entry.strmode.decode('us-ascii'),
                    entry=entry,
                    user=user,
                    group=group,
                    size_or_dev=size_or_dev,
                    mtime=mtime,
                    name_and_link=name_and_link,
                )
    except libarchive.exception.ArchiveError:
        if not ignore_errors:
            raise
Esempio n. 18
0
def filter_excludes(filenames):
    result = []

    for x in filenames:
        for y in Config().excludes:
            if fnmatch.fnmatchcase(x, y):
                logger.debug("Excluding %s as it matches pattern '%s'", x, y)
                break
        else:
            result.append(x)

    return result
Esempio n. 19
0
def compare_root_paths(path1, path2):
    from ..directory import FilesystemDirectory, FilesystemFile, compare_directories

    if not Config().new_file:
        bail_if_non_existing(path1, path2)
    if any_excluded(path1, path2):
        return None
    if os.path.isdir(path1) and os.path.isdir(path2):
        return compare_directories(path1, path2)
    container1 = FilesystemDirectory(os.path.dirname(path1)).as_container
    file1 = specialize(FilesystemFile(path1, container=container1))
    container2 = FilesystemDirectory(os.path.dirname(path2)).as_container
    file2 = specialize(FilesystemFile(path2, container=container2))
    return compare_files(file1, file2)
Esempio n. 20
0
def output_html(difference, css_url=None, print_func=None):
    """
    Default presenter, all in one HTML file
    """
    if print_func is None:
        print_func = print
    print_func = create_limited_print_func(print_func,
                                           Config().max_report_size)
    try:
        output_header(css_url, print_func)
        output_difference(difference, print_func, css_url, None, [])
    except PrintLimitReached:
        logger.debug('print limit reached')
        print_func(u'<div class="error">Max output size reached.</div>',
                   force=True)
    output_footer(print_func)
Esempio n. 21
0
    def comparisons(self, other):
        my_md5sums = {}
        other_md5sums = {}

        if self.source:
            my_md5sums = self.source.container.source.container.source.md5sums
        if other.source:
            other_md5sums = other.source.container.source.container.source.md5sums

        for my_member, other_member, comment in super().comparisons(other):
            if not Config().force_details and \
               my_member.name == other_member.name and \
               my_md5sums.get(my_member.name, 'my') == other_md5sums.get(other_member.name, 'other'):
                logger.debug("Skip %s: identical md5sum", my_member.name)
                continue
            yield my_member, other_member, comment
Esempio n. 22
0
 def compare_details(self, other, source=None):
     differences = []
     zipinfo_difference = None
     if Config().exclude_directory_metadata != 'recursive':
         zipinfo_difference = (
             Difference.from_command(Zipinfo, self.path, other.path)
             or Difference.from_command(
                 ZipinfoVerbose, self.path, other.path
             )
             or Difference.from_command(
                 BsdtarVerbose, self.path, other.path
             )
         )
     zipnote_difference = Difference.from_command(
         Zipnote, self.path, other.path
     )
     for x in (zipinfo_difference, zipnote_difference):
         if x is not None:
             differences.append(x)
     return differences
Esempio n. 23
0
    def _compare_using_details(self, other, source):
        details = []
        difference = Difference(None, self.name, other.name, source=source)

        if hasattr(self, 'compare_details'):
            details.extend(self.compare_details(other, source))
        if self.as_container:
            if self.as_container.auto_diff_metadata:
                details.extend([
                    Difference.from_text(
                        self.magic_file_type,
                        other.magic_file_type,
                        self,
                        other,
                        source='filetype from file(1)',
                    ),
                    Difference.from_text(
                        self.__class__.__name__,
                        other.__class__.__name__,
                        self,
                        other,
                        source='filetype from diffoscope',
                    ),
                ])
            # Don't recurse forever on archive quines, etc.
            depth = self._as_container.depth
            no_recurse = depth >= Config().max_container_depth
            if no_recurse:
                msg = "Reached max container depth ({})".format(depth)
                logger.debug(msg)
                difference.add_comment(msg)
            details.extend(
                self.as_container.compare(other.as_container,
                                          no_recurse=no_recurse))

        details = [x for x in details if x]
        if not details:
            return None
        difference.add_details(details)

        return difference
Esempio n. 24
0
def compare_meta(path1, path2):
    if Config().exclude_directory_metadata in ('yes', 'recursive'):
        logger.debug(
            "Excluding directory metadata for paths (%s, %s)", path1, path2
        )
        return []

    logger.debug('compare_meta(%s, %s)', path1, path2)
    differences = []

    # Don't run any commands if any of the paths do not exist
    if not os.path.exists(path1) or not os.path.exists(path2):
        return differences

    try:
        differences.append(Difference.from_command(Stat, path1, path2))
    except RequiredToolNotFound:
        logger.error("Unable to find 'stat'! Is PATH wrong?")
    if os.path.islink(path1) or os.path.islink(path2):
        return [d for d in differences if d is not None]
    try:
        differences.append(Difference.from_command(Getfacl, path1, path2))
    except RequiredToolNotFound:
        logger.info(
            "Unable to find 'getfacl', some directory metadata differences might not be noticed."
        )
    try:
        lsattr1 = lsattr(path1)
        lsattr2 = lsattr(path2)
        differences.append(
            Difference.from_text(
                lsattr1, lsattr2, path1, path2, source='lsattr'
            )
        )
    except RequiredToolNotFound:
        logger.info(
            "Unable to find 'lsattr', some directory metadata differences might not be noticed."
        )
    differences.append(xattr(path1, path2))
    return [d for d in differences if d is not None]
Esempio n. 25
0
def compare_files(file1, file2, source=None, diff_content_only=False):
    logger.debug(
        "Comparing %s (%s) and %s (%s)",
        file1.name,
        file1.__class__.__name__,
        file2.name,
        file2.__class__.__name__,
    )

    if any_excluded(file1.name, file2.name):
        return None

    force_details = Config().force_details
    with profile('has_same_content_as', file1):
        has_same_content = file1.has_same_content_as(file2)

    if has_same_content:
        if not force_details:
            logger.debug(
                "has_same_content_as returned True; skipping further comparisons"
            )
            return None
        if diff_content_only:
            return None
    elif diff_content_only:
        assert not has_same_content
        return Difference(None, file1.name, file2.name, comment="Files differ")

    specialize(file1)
    specialize(file2)
    if isinstance(file1, MissingFile):
        file1.other_file = file2
    elif isinstance(file2, MissingFile):
        file2.other_file = file1
    elif (file1.__class__.__name__ != file2.__class__.__name__) and (
            file1.as_container is None or file2.as_container is None):
        return file1.compare_bytes(file2, source)
    with profile('compare_files (cumulative)', file1):
        return file1.compare(file2, source)
Esempio n. 26
0
    def _compare_using_details(self, other, source):
        details = []
        difference = Difference(None, self.name, other.name, source=source)

        if hasattr(self, 'compare_details'):
            details.extend(self.compare_details(other, source))
        if self.as_container:
            # Don't recursve forever on archive quines, etc.
            depth = self._as_container.depth
            no_recurse = (depth >= Config().max_container_depth)
            if no_recurse:
                msg = "Reached max container depth ({})".format(depth)
                logger.debug(msg)
                difference.add_comment(msg)
            details.extend(
                self.as_container.compare(other.as_container,
                                          no_recurse=no_recurse))

        details = [x for x in details if x]
        if not details:
            return None
        difference.add_details(details)

        return difference
Esempio n. 27
0
        def process_node(node, score):
            path = score[3]
            diff_path = output_diff_path(path)
            pagename = md5(diff_path)
            logger.debug('html output for %s', diff_path)

            ancestor = ancestors.pop(node, None)
            assert ancestor in path or (ancestor is None
                                        and node is root_difference)
            node_output, node_continuation = output_node(
                ctx, node, path, "  ",
                len(path) - 1)

            add_to_existing = False
            if ancestor:
                page_limit = (Config().max_page_size
                              if ancestor is root_difference else
                              Config().max_page_size_child)
                page_current = outputs[ancestor].size(placeholder_len)
                report_current = self.report_printed + sum(
                    p.size(placeholder_len) for p in outputs.values())
                want_to_add = node_output.size(placeholder_len)
                logger.debug(
                    "report size: %s/%s, page size: %s/%s, want to add %s)",
                    report_current,
                    self.report_limit,
                    page_current,
                    page_limit,
                    want_to_add,
                )
                if report_current + want_to_add > self.report_limit:
                    make_new_subpage = False
                elif page_current + want_to_add < page_limit:
                    add_to_existing = True
                else:
                    make_new_subpage = not ctx.single_page

            if add_to_existing:
                # under limit, add it to an existing page
                outputs[ancestor] = outputs[ancestor].pformat(
                    {node: node_output})
                stored = ancestor

            else:
                # over limit (or root), new subpage or continue/break
                if ancestor:
                    placeholder = self.output_node_placeholder(
                        pagename, make_new_subpage, node.size())
                    outputs[ancestor] = outputs[ancestor].pformat(
                        {node: placeholder})
                    self.maybe_print(ancestor, printers, outputs,
                                     continuations)
                    footer = output_footer()
                    # we hit a limit, either max-report-size or single-page
                    if not make_new_subpage:
                        if outputs:
                            # True = don't traverse this node's children,
                            # because they won't be output however there are
                            # holes in other pages, so don't break the loop
                            # just yet
                            return True
                        # No more holes, don't traverse any more nodes
                        raise StopIteration
                else:
                    # Unconditionally write the root node regardless of limits
                    assert node is root_difference
                    footer = output_footer(ctx.jquery_url)
                    pagename = "index"

                outputs[node] = node_output.frame(
                    output_header(ctx.css_url, ctx.our_css_url, ctx.icon_url) +
                    u'<div class="difference">\n',
                    u'</div>\n' + footer,
                )
                assert not ctx.single_page or node is root_difference
                printers[node] = ((make_printer,
                                   ctx.target) if ctx.single_page else
                                  (file_printer, ctx.target,
                                   "%s.html" % pagename))
                stored = node

            for child in node.details:
                logger.debug(
                    "scheduling future html output for: %s",
                    output_diff_path(path + [child]),
                )
                ancestors[child] = stored

            conts = continuations.setdefault(stored, [])
            if node_continuation:
                conts.append(node_continuation)

            self.maybe_print(stored, printers, outputs, continuations)
Esempio n. 28
0
 def reset(self):
     self.report_printed = 0
     self.report_limit = Config().max_report_size
Esempio n. 29
0
 def __init__(self):
     self.max_lines = Config().max_diff_block_lines  # only for html-dir
     self.max_lines_parent = Config().max_page_diff_block_lines
     self.max_page_size_child = Config().max_page_size_child
Esempio n. 30
0
def test_compare_non_existing(monkeypatch, gzip1):
    monkeypatch.setattr(Config(), 'new_file', True)
    difference = gzip1.compare(MissingFile('/nonexisting', gzip1))
    assert difference.source2 == '/nonexisting'
    assert difference.details[-1].source2 == '/dev/null'