Example #1
0
 def extract(self, member_name, dest_dir):
     if '..' in member_name.split('/'):
         raise ValueError('relative path in squashfs')
     cmd = ['unsquashfs', '-n', '-f', '-d', dest_dir, self.source.path, member_name]
     logger.debug("unsquashfs %s into %s", member_name, dest_dir)
     subprocess.check_call(cmd, shell=False, stdout=subprocess.PIPE)
     return '%s%s' % (dest_dir, member_name)
Example #2
0
def perform_fuzzy_matching(members1, members2):
    if tlsh == None or Config.general.fuzzy_threshold == 0:
        return
    already_compared = set()
    # Perform local copies because they will be modified by consumer
    members1 = dict(members1)
    members2 = dict(members2)
    for name1, file1 in members1.items():
        if file1.is_directory() or not file1.fuzzy_hash:
            continue
        comparisons = []
        for name2, file2 in members2.items():
            if name2 in already_compared or file2.is_directory(
            ) or not file2.fuzzy_hash:
                continue
            comparisons.append((tlsh.diff(file1.fuzzy_hash,
                                          file2.fuzzy_hash), name2))
        if comparisons:
            comparisons.sort(key=operator.itemgetter(0))
            score, name2 = comparisons[0]
            logger.debug('fuzzy top match %s %s: %d difference score', name1,
                         name2, score)
            if score < Config.general.fuzzy_threshold:
                yield name1, name2, score
                already_compared.add(name2)
Example #3
0
def output_difference(difference, print_func, css_url, directory, parents):
    logger.debug('html output for %s', difference.source1)
    sources = parents + [difference.source1]
    print_func(u"<div class='difference'>")
    try:
        print_func(u"<div class='diffheader'>")
        if difference.source1 == difference.source2:
            print_func(u"<div><span class='source'>%s<span>"
                       % escape(difference.source1))
        else:
            print_func(u"<div><span class='source'>%s</span> vs.</div>"
                       % escape(difference.source1))
            print_func(u"<div><span class='source'>%s</span>"
                       % escape(difference.source2))
        anchor = '/'.join(sources[1:])
        print_func(u" <a class='anchor' href='#%s' name='%s'>\xb6</a>" % (anchor, anchor))
        print_func(u"</div>")
        if difference.comments:
            print_func(u"<div class='comment'>%s</div>"
                       % u'<br />'.join(map(escape, difference.comments)))
        print_func(u"</div>")
        if difference.unified_diff:
            output_unified_diff(print_func, css_url, directory, difference.unified_diff)
        for detail in difference.details:
            output_difference(detail, print_func, css_url, directory, sources)
    except PrintLimitReached:
        logger.debug('print limit reached')
        raise
    finally:
        print_func(u"</div>", force=True)
Example #4
0
 def path(self):
     if self._path is None:
         logger.debug('unpacking %s', self._name)
         assert self._temp_dir is None
         self._temp_dir = get_temporary_directory()
         self._path = self.container.extract(self._name, self._temp_dir.name)
     return self._path
Example #5
0
 def wait(self):
     if self._stdin_feeder:
         self._stdin_feeder.join()
     self._stderr_reader.join()
     returncode = self._process.wait()
     logger.debug('done with %s. exit code %d', self.cmdline()[0], returncode)
     return returncode
Example #6
0
def run_diff(fd1, fd2, end_nl_q1, end_nl_q2):
    cmd = ['diff', '-aU7', '/dev/fd/%d' % fd1, '/dev/fd/%d' % fd2]
    logger.debug('running %s', cmd)
    if hasattr(os, 'set_inheritable'): # new in Python 3.4
        os.set_inheritable(fd1, True)
        os.set_inheritable(fd2, True)
    p = subprocess.Popen(cmd, shell=False, bufsize=1,
                         stdin=subprocess.PIPE,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT,
                         pass_fds=(fd1, fd2))
    p.stdin.close()
    os.close(fd1)
    os.close(fd2)
    parser = DiffParser(p.stdout, end_nl_q1, end_nl_q2)
    t_read = Thread(target=parser.parse)
    t_read.daemon = True
    t_read.start()
    t_read.join()
    p.wait()
    logger.debug('done with diff, returncode %d, parsed %s', p.returncode, parser.success)
    if not parser.success and p.returncode not in (0, 1):
        raise subprocess.CalledProcessError(p.returncode, cmd, output=diff)
    if p.returncode == 0:
        return None
    return parser.diff
Example #7
0
 def wait(self):
     if self._stdin_feeder:
         self._stdin_feeder.join()
     self._stderr_reader.join()
     returncode = self._process.wait()
     logger.debug('done with %s. exit code %d', self.cmdline()[0], returncode)
     return returncode
Example #8
0
def compare_meta(path1, path2):
    logger.debug('compare_meta(%s, %s)', path1, path2)
    differences = []
    try:
        differences.append(Difference.from_command(Stat, path1, path2))
    except RequiredToolNotFound:
        logger.warn("'stat' not found! Is PATH wrong?")
    if os.path.islink(path1) or os.path.islink(path2):
        return [d for d in differences if d is not None]
    try:
        lsattr1 = lsattr(path1)
        lsattr2 = lsattr(path2)
        differences.append(
            Difference.from_text(lsattr1,
                                 lsattr2,
                                 path1,
                                 path2,
                                 source="lattr"))
    except RequiredToolNotFound:
        logger.info("Unable to find 'lsattr'.")
    try:
        differences.append(Difference.from_command(Getfacl, path1, path2))
    except RequiredToolNotFound:
        logger.info("Unable to find 'getfacl'.")
    return [d for d in differences if d is not None]
Example #9
0
def output_difference(difference, print_func, css_url, directory, parents):
    logger.debug('html output for %s', difference.source1)
    sources = parents + [difference.source1]
    print_func(u"<div class='difference'>")
    try:
        print_func(u"<div class='diffheader'>")
        if difference.source1 == difference.source2:
            print_func(u"<div><span class='source'>%s<span>" %
                       escape(difference.source1))
        else:
            print_func(u"<div><span class='source'>%s</span> vs.</div>" %
                       escape(difference.source1))
            print_func(u"<div><span class='source'>%s</span>" %
                       escape(difference.source2))
        anchor = '/'.join(sources[1:])
        print_func(u" <a class='anchor' href='#%s' name='%s'>\xb6</a>" %
                   (anchor, anchor))
        print_func(u"</div>")
        if difference.comments:
            print_func(u"<div class='comment'>%s</div>" %
                       u'<br />'.join(map(escape, difference.comments)))
        print_func(u"</div>")
        if difference.unified_diff:
            output_unified_diff(print_func, css_url, directory,
                                difference.unified_diff)
        for detail in difference.details:
            output_difference(detail, print_func, css_url, directory, sources)
    except PrintLimitReached:
        logger.debug('print limit reached')
        raise
    finally:
        print_func(u"</div>", force=True)
Example #10
0
def run_diff(fd1, fd2, end_nl_q1, end_nl_q2):
    cmd = ['diff', '-aU7', '/dev/fd/%d' % fd1, '/dev/fd/%d' % fd2]
    logger.debug('running %s', cmd)
    if hasattr(os, 'set_inheritable'):  # new in Python 3.4
        os.set_inheritable(fd1, True)
        os.set_inheritable(fd2, True)
    p = subprocess.Popen(cmd,
                         shell=False,
                         bufsize=1,
                         stdin=subprocess.PIPE,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT,
                         pass_fds=(fd1, fd2))
    p.stdin.close()
    os.close(fd1)
    os.close(fd2)
    parser = DiffParser(p.stdout, end_nl_q1, end_nl_q2)
    t_read = Thread(target=parser.parse)
    t_read.daemon = True
    t_read.start()
    t_read.join()
    p.wait()
    logger.debug('done with diff, returncode %d, parsed %s', p.returncode,
                 parser.success)
    if not parser.success and p.returncode not in (0, 1):
        raise subprocess.CalledProcessError(p.returncode, cmd, output=diff)
    if p.returncode == 0:
        return None
    return parser.diff
Example #11
0
 def recognizes(file):
     size = os.stat(file.path).st_size
     if size < CBFS_HEADER_SIZE or size > CBFS_MAXIMUM_FILE_SIZE:
         return False
     with open(file.path, 'rb') as f:
         # pick at the latest byte as it should contain the relative offset of the header
         f.seek(-4, io.SEEK_END)
         # <pgeorgi> given the hardware we support so far, it looks like
         #           that field is now bound to be little endian
         #   -- #coreboot, 2015-10-14
         rel_offset = struct.unpack('<i', f.read(4))[0]
         if rel_offset < 0 and -rel_offset > CBFS_HEADER_SIZE and -rel_offset < size:
             f.seek(rel_offset, io.SEEK_END)
             logger.debug('looking for header at offset: %x', f.tell())
             if is_header_valid(f.read(CBFS_HEADER_SIZE), size):
                 return True
             elif not file.name.endswith('.rom'):
                 return False
             else:
                 logger.debug('CBFS relative offset seems wrong, scanning whole image')
         f.seek(0, io.SEEK_SET)
         offset = 0
         buf = f.read(CBFS_HEADER_SIZE)
         while len(buf) >= CBFS_HEADER_SIZE:
             if is_header_valid(buf, size, offset):
                 return True
             if len(buf) - offset <= CBFS_HEADER_SIZE:
                 buf = f.read(32768)
                 offset = 0
             else:
                 offset += 1
         return False
Example #12
0
 def recognizes(file):
     size = os.stat(file.path).st_size
     if size < CBFS_HEADER_SIZE or size > CBFS_MAXIMUM_FILE_SIZE:
         return False
     with open(file.path, 'rb') as f:
         # pick at the latest byte as it should contain the relative offset of the header
         f.seek(-4, io.SEEK_END)
         # <pgeorgi> given the hardware we support so far, it looks like
         #           that field is now bound to be little endian
         #   -- #coreboot, 2015-10-14
         rel_offset = struct.unpack('<i', f.read(4))[0]
         if rel_offset < 0 and -rel_offset > CBFS_HEADER_SIZE and -rel_offset < size:
             f.seek(rel_offset, io.SEEK_END)
             logger.debug('looking for header at offset: %x', f.tell())
             if is_header_valid(f.read(CBFS_HEADER_SIZE), size):
                 return True
             elif not file.name.endswith('.rom'):
                 return False
             else:
                 logger.debug(
                     'CBFS relative offset seems wrong, scanning whole image'
                 )
         f.seek(0, io.SEEK_SET)
         offset = 0
         buf = f.read(CBFS_HEADER_SIZE)
         while len(buf) >= CBFS_HEADER_SIZE:
             if is_header_valid(buf, size, offset):
                 return True
             if len(buf) - offset <= CBFS_HEADER_SIZE:
                 buf = f.read(32768)
                 offset = 0
             else:
                 offset += 1
         return False
Example #13
0
 def extract(self, member_name, dest_dir):
     if '..' in member_name.split('/'):
         raise ValueError('relative path in squashfs')
     cmd = ['unsquashfs', '-n', '-f', '-d', dest_dir, self.source.path, member_name]
     logger.debug("unsquashfs %s into %s", member_name, dest_dir)
     subprocess.check_call(cmd, shell=False, stdout=subprocess.PIPE)
     return '%s%s' % (dest_dir, member_name)
Example #14
0
 def path(self):
     if self._path is None:
         logger.debug('unpacking %s', self._name)
         assert self._temp_dir is None
         self._temp_dir = get_temporary_directory()
         self._path = self.container.extract(self._name, self._temp_dir.name)
     return self._path
Example #15
0
 def extract(self, member_name, dest_dir):
     dest_path = os.path.join(dest_dir, member_name)
     logger.debug('gzip extracting to %s', dest_path)
     with open(dest_path, 'wb') as fp:
         subprocess.check_call(
             ["gzip", "--decompress", "--stdout", self.source.path],
             shell=False, stdout=fp, stderr=None)
     return dest_path
Example #16
0
 def extract(self, member_name, dest_dir):
     dest_path = os.path.join(dest_dir, member_name)
     logger.debug('dex extracting to %s', dest_path)
     subprocess.check_call(['enjarify', '-o', dest_path, self.source.path],
                           shell=False,
                           stderr=None,
                           stdout=subprocess.PIPE)
     return dest_path
Example #17
0
 def extract(self, member_name, dest_dir):
     dest_path = os.path.join(dest_dir, member_name)
     logger.debug('xz extracting to %s', dest_path)
     with open(dest_path, 'wb') as fp:
         subprocess.check_call(
             ["xz", "--decompress", "--stdout", self.source.path],
             shell=False, stdout=fp, stderr=None)
     return dest_path
Example #18
0
 def md5sums(self):
     if not hasattr(self, '_md5sums'):
         md5sums_file = self.as_container.control_tar.as_container.lookup_file('./md5sums')
         if md5sums_file:
             self._md5sums = md5sums_file.parse()
         else:
             logger.debug('Unable to find a md5sums file')
             self._md5sums = {}
     return self._md5sums
Example #19
0
 def get_reverse(self):
     if self._unified_diff is None:
         unified_diff = None
     else:
         unified_diff = reverse_unified_diff(self._unified_diff)
     logger.debug('reverse orig %s %s', self._source1, self._source2)
     difference = Difference(unified_diff, None, None, source=[self._source2, self._source1], comment=self._comments)
     difference.add_details([d.get_reverse() for d in self._details])
     return difference
Example #20
0
 def as_container(self):
     if not hasattr(self.__class__, 'CONTAINER_CLASS'):
         if hasattr(self, '_other_file'):
             return self._other_file.__class__.CONTAINER_CLASS(self)
         return None
     if not hasattr(self, '_as_container'):
         logger.debug('instanciating %s for %s', self.__class__.CONTAINER_CLASS, self)
         self._as_container = self.__class__.CONTAINER_CLASS(self)
     logger.debug('returning a %s for %s', self._as_container.__class__, self)
     return self._as_container
Example #21
0
def _should_skip_section(name, type):
    for cmd in READELF_COMMANDS:
        if cmd.should_skip_section(name, type):
            logger.debug('skipping section %s, covered by %s', name, cmd)
            return True
    if name.startswith('.debug') or name.startswith('.zdebug'):
        # section .debug_str looks much nicer with `readelf --string-dump`
        # the rest is handled by READELF_DEBUG_DUMP_COMMANDS
        return not name.endswith('_str')
    return False
Example #22
0
 def extract(self, member_name, dest_dir):
     dest_path = os.path.join(dest_dir, member_name)
     logger.debug('rust-object extracting to %s', dest_path)
     # See librustc_trans/back/link.rs for details of this format
     with open(dest_path, 'wb') as fpw, open(self.source.path, 'rb') as fpr:
         raw_deflate = fpr.read()[RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET:]
         # decompressobj() ignores the (non-existent) checksum; a zlib.decompress() would error
         raw_inflate = zlib.decompressobj().decompress(ZLIB_DEFAULT_COMPRESSION + raw_deflate)
         fpw.write(raw_inflate)
     return dest_path
Example #23
0
def _should_skip_section(name, type):
    for cmd in READELF_COMMANDS:
        if cmd.should_skip_section(name, type):
            logger.debug('skipping section %s, covered by %s', name, cmd)
            return True
    if name.startswith('.debug') or name.startswith('.zdebug'):
        # section .debug_str looks much nicer with `readelf --string-dump`
        # the rest is handled by READELF_DEBUG_DUMP_COMMANDS
        return not name.endswith('_str')
    return False
Example #24
0
def get_build_id(path):
    try:
        output = subprocess.check_output(['readelf', '--notes', path])
    except subprocess.CalledProcessError as e:
        logger.debug('Unable to get Build Id for %s: %s', path, e)
        return None
    m = re.search(r'^\s+Build ID: ([0-9a-f]+)$', output.decode('utf-8'), flags=re.MULTILINE)
    if not m:
        return None
    return m.group(1)
Example #25
0
 def md5sums(self):
     if not hasattr(self, '_md5sums'):
         md5sums_file = self.as_container.lookup_file(
             'control.tar.gz', 'control.tar', './md5sums')
         if md5sums_file:
             self._md5sums = md5sums_file.parse()
         else:
             logger.debug('Unable to find a md5sums file')
             self._md5sums = {}
     return self._md5sums
Example #26
0
 def get_content(self):
     logger.debug('%s get_content; path %s', self, self._path)
     if self._path is not None:
         yield
     else:
         with make_temp_directory() as temp_dir, \
              self._container.open() as container:
             self._path = container.extract(self._name, temp_dir)
             yield
             self._path = None
Example #27
0
def get_debug_link(path):
    try:
        output = subprocess.check_output(['readelf', '--string-dump=.gnu_debuglink', path])
    except subprocess.CalledProcessError as e:
        logger.debug('Unable to get Build Id for %s: %s', path, e)
        return None
    m = re.search(r'^\s+\[\s+0\]\s+(\S+)$', output.decode('utf-8', errors='replace'), flags=re.MULTILINE)
    if not m:
        return None
    return m.group(1)
Example #28
0
 def compare(self, other, source=None):
     # So now that comparators are all object-oriented, we don't have any clue on how to
     # perform a meaningful comparison right here. So we are good do the comparison backward
     # (where knowledge of the file format lies) and and then reverse it.
     if isinstance(other, NonExistingFile):
         return Difference(None, self.name, other.name, comment='Trying to compare two non-existing files.')
     logger.debug('Performing backward comparison')
     backward_diff = other.compare(self, source)
     if not backward_diff:
         return None
     return backward_diff.get_reverse()
Example #29
0
    def validate_checksums(self, check_hash="sha1"):
        """
        Validate checksums for a package, using ``check_hack``'s type
        to validate the package.

        Valid ``check_hash`` types:

            * sha1
            * sha256
            * md5
            * md5sum
        """
        logger.debug("validating %s checksums", check_hash)

        for filename in self.get_files():
            if check_hash == "sha1":
                hash_type = hashlib.sha1()
                checksums = self.get("Checksums-Sha1")
                field_name = "sha1"
            elif check_hash == "sha256":
                hash_type = hashlib.sha256()
                checksums = self.get("Checksums-Sha256")
                field_name = "sha256"
            elif check_hash == "md5":
                hash_type = hashlib.md5()
                checksums = self.get("Files")
                field_name = "md5sum"

            changed_files = None # appease pylint
            for changed_files in checksums:
                if changed_files['name'] == os.path.basename(filename):
                    break
            else:
                assert(
                    "get_files() returns different files than Files: knows?!")

            with open(os.path.join(self._directory, filename), "rb") as fc:
                while True:
                    chunk = fc.read(131072)
                    if not chunk:
                        break
                    hash_type.update(chunk)
            fc.close()

            if not hash_type.hexdigest() == changed_files[field_name]:
                raise ChangesFileException(
                    "Checksum mismatch for file %s: %s != %s" % (
                        filename,
                        hash_type.hexdigest(),
                        changed_files[field_name]
                    ))
            else:
                logger.debug("%s Checksum for file %s matches",
                    field_name, filename)
Example #30
0
 def parse(self):
     try:
         md5sums = {}
         with open(self.path, 'r', encoding='utf-8') as f:
             for line in f:
                 md5sum, path = re.split(r'\s+', line.strip(), maxsplit=1)
                 md5sums['./%s' % path] = md5sum
         return md5sums
     except (UnicodeDecodeError, ValueError):
         logger.debug('Malformed md5sums, ignoring.')
         return {}
Example #31
0
    def compare(self, other, source=None):
        differences = super().compare(other, source)
        details = None
        try:
            details = Difference.from_command(Pstotext, self.path, other.path)
        except RequiredToolNotFound:
            logger.debug('ps2ascii not found')

        if details:
            differences.add_details([details])
        return differences
Example #32
0
    def has_same_content_as(self, other):
        logger.debug('%s has_same_content %s', self, other)
        # try comparing small files directly first
        my_size = os.path.getsize(self.path)
        other_size = os.path.getsize(other.path)
        if my_size == other_size and my_size <= SMALL_FILE_THRESHOLD:
            if open(self.path, 'rb').read() == open(other.path, 'rb').read():
                return True

        return 0 == subprocess.call(['cmp', '-s', self.path, other.path],
                                    shell=False, close_fds=True)
Example #33
0
 def parse(self):
     try:
         md5sums = {}
         with open(self.path, 'r', encoding='utf-8') as f:
             for line in f:
                 md5sum, path = re.split(r'\s+', line.strip(), maxsplit=1)
                 md5sums['./%s' % path] = md5sum
         return md5sums
     except (UnicodeDecodeError, ValueError):
         logger.debug('Malformed md5sums, ignoring.')
         return {}
Example #34
0
def get_build_id(path):
    try:
        output = subprocess.check_output(['readelf', '--notes', path])
    except subprocess.CalledProcessError as e:
        logger.debug('Unable to get Build Id for %s: %s', path, e)
        return None
    m = re.search(r'^\s+Build ID: ([0-9a-f]+)$',
                  output.decode('utf-8'),
                  flags=re.MULTILINE)
    if not m:
        return None
    return m.group(1)
Example #35
0
 def as_container(self):
     if not hasattr(self.__class__, 'CONTAINER_CLASS'):
         if hasattr(self, '_other_file'):
             return self._other_file.__class__.CONTAINER_CLASS(self)
         return None
     if not hasattr(self, '_as_container'):
         logger.debug('instanciating %s for %s',
                      self.__class__.CONTAINER_CLASS, self)
         self._as_container = self.__class__.CONTAINER_CLASS(self)
     logger.debug('returning a %s for %s', self._as_container.__class__,
                  self)
     return self._as_container
Example #36
0
def output_html(difference, css_url=None, print_func=None):
    if print_func is None:
        print_func = print
    print_func = create_limited_print_func(print_func, Config.general.max_report_size)
    try:
        output_header(css_url, print_func)
        output_difference(difference, print_func, [])
    except PrintLimitReached:
        logger.debug('print limit reached')
        print_func(u"<div class='error'>Max output size reached.</div>",
                   force=True)
    print_func(FOOTER % {'version': VERSION}, force=True)
Example #37
0
 def extract(self, member_name, dest_dir):
     dest_path = os.path.join(dest_dir, os.path.basename(member_name))
     cmd = [
         'cbfstool', self.source.path, 'extract', '-n', member_name, '-f',
         dest_path
     ]
     logger.debug("cbfstool extract %s to %s", member_name, dest_path)
     subprocess.check_call(cmd,
                           shell=False,
                           stdout=subprocess.PIPE,
                           stderr=subprocess.DEVNULL)
     return dest_path
Example #38
0
 def extract(self, member_name, dest_dir):
     dest_path = os.path.join(dest_dir, os.path.basename(member_name))
     logger.debug('libarchive extracting %s to %s', member_name, dest_path)
     with libarchive.file_reader(self.source.path) as archive:
         for entry in archive:
             if entry.pathname == member_name:
                 logger.debug('entry found, writing %s', dest_path)
                 with open(dest_path, 'wb') as f:
                     for buf in entry.get_blocks():
                         f.write(buf)
                 return dest_path
     raise KeyError('%s not found in archive', member_name)
Example #39
0
    def has_same_content_as(self, other):
        logger.debug('%s has_same_content %s', self, other)
        # try comparing small files directly first
        my_size = os.path.getsize(self.path)
        other_size = os.path.getsize(other.path)
        if my_size == other_size and my_size <= SMALL_FILE_THRESHOLD:
            if open(self.path, 'rb').read() == open(other.path, 'rb').read():
                return True

        return 0 == subprocess.call(['cmp', '-s', self.path, other.path],
                                    shell=False,
                                    close_fds=True)
Example #40
0
    def validate_checksums(self, check_hash="sha1"):
        """
        Validate checksums for a package, using ``check_hack``'s type
        to validate the package.

        Valid ``check_hash`` types:

            * sha1
            * sha256
            * md5
            * md5sum
        """
        logger.debug("validating %s checksums", check_hash)

        for filename in self.get_files():
            if check_hash == "sha1":
                hash_type = hashlib.sha1()
                checksums = self.get("Checksums-Sha1")
                field_name = "sha1"
            elif check_hash == "sha256":
                hash_type = hashlib.sha256()
                checksums = self.get("Checksums-Sha256")
                field_name = "sha256"
            elif check_hash == "md5":
                hash_type = hashlib.md5()
                checksums = self.get("Files")
                field_name = "md5sum"

            changed_files = None  # appease pylint
            for changed_files in checksums:
                if changed_files['name'] == os.path.basename(filename):
                    break
            else:
                assert (
                    "get_files() returns different files than Files: knows?!")

            with open(os.path.join(self._directory, filename), "rb") as fc:
                while True:
                    chunk = fc.read(131072)
                    if not chunk:
                        break
                    hash_type.update(chunk)
            fc.close()

            if not hash_type.hexdigest() == changed_files[field_name]:
                raise ChangesFileException(
                    "Checksum mismatch for file %s: %s != %s" %
                    (filename, hash_type.hexdigest(),
                     changed_files[field_name]))
            else:
                logger.debug("%s Checksum for file %s matches", field_name,
                             filename)
Example #41
0
 def get_reverse(self):
     if self._unified_diff is None:
         unified_diff = None
     else:
         unified_diff = reverse_unified_diff(self._unified_diff)
     logger.debug('reverse orig %s %s', self._source1, self._source2)
     difference = Difference(unified_diff,
                             None,
                             None,
                             source=[self._source2, self._source1],
                             comment=self._comments)
     difference.add_details([d.get_reverse() for d in self._details])
     return difference
Example #42
0
def get_debug_link(path):
    try:
        output = subprocess.check_output(
            ['readelf', '--string-dump=.gnu_debuglink', path])
    except subprocess.CalledProcessError as e:
        logger.debug('Unable to get Build Id for %s: %s', path, e)
        return None
    m = re.search(r'^\s+\[\s+0\]\s+(\S+)$',
                  output.decode('utf-8', errors='replace'),
                  flags=re.MULTILINE)
    if not m:
        return None
    return m.group(1)
Example #43
0
def output_html_directory(directory,
                          difference,
                          css_url=None,
                          jquery_url=None):
    """
    Multi-file presenter. Writes to a directory, and puts large diff tables
    into files of their own.

    This uses jQuery. By default it uses /usr/share/javascript/jquery/jquery.js
    (symlinked, so that you can still share the result over HTTP).
    You can also pass --jquery URL to diffoscope to use a central jQuery copy.
    """
    if not os.path.exists(directory):
        os.makedirs(directory)

    if not jquery_url:
        jquery_symlink = os.path.join(directory, "jquery.js")
        if os.path.exists(jquery_symlink):
            jquery_url = "./jquery.js"
        else:
            if os.path.lexists(jquery_symlink):
                os.unlink(jquery_symlink)
            for path in JQUERY_SYSTEM_LOCATIONS:
                if os.path.exists(path):
                    os.symlink("/usr/share/javascript/jquery/jquery.js",
                               jquery_symlink)
                    jquery_url = "./jquery.js"
                    break
            if not jquery_url:
                logger.warning(
                    '--jquery was not specified and jQuery was not found in any known location. Disabling on-demand inline loading.'
                )
                logger.debug('Locations searched: %s',
                             ', '.join(JQUERY_SYSTEM_LOCATIONS))
    if jquery_url == 'disable':
        jquery_url = None

    with file_printer(directory, "index.html") as print_func:
        print_func = create_limited_print_func(print_func,
                                               Config.general.max_report_size)
        try:
            output_header(css_url, print_func)
            output_difference(difference, print_func, css_url, directory, [])
        except PrintLimitReached:
            logger.debug('print limit reached')
            print_func(u"<div class='error'>Max output size reached.</div>",
                       force=True)
        if jquery_url:
            print_func(SCRIPTS % {'jquery_url': escape(jquery_url)},
                       force=True)
        output_footer(print_func)
Example #44
0
def compare_files(file1, file2, source=None):
    logger.debug('compare files %s and %s', file1, file2)
    if file1.has_same_content_as(file2):
        logger.debug('same content, skipping')
        return None
    specialize(file1)
    specialize(file2)
    if isinstance(file1, NonExistingFile):
        file1.other_file = file2
    elif isinstance(file2, NonExistingFile):
        file2.other_file = file1
    elif file1.__class__.__name__ != file2.__class__.__name__:
        return file1.compare_bytes(file2, source)
    return file1.compare(file2, source)
Example #45
0
def compare_files(file1, file2, source=None):
    logger.debug('compare files %s and %s', file1, file2)
    if file1.has_same_content_as(file2):
        logger.debug('same content, skipping')
        return None
    specialize(file1)
    specialize(file2)
    if isinstance(file1, NonExistingFile):
        file1.other_file = file2
    elif isinstance(file2, NonExistingFile):
        file2.other_file = file1
    elif file1.__class__.__name__ != file2.__class__.__name__:
        return file1.compare_bytes(file2, source)
    return file1.compare(file2, source)
Example #46
0
 def comparisons(self, other):
     if self.source:
         my_md5sums = self.source.container.source.container.source.md5sums
     else:
         my_md5sums = {}
     if other.source:
         other_md5sums = other.source.container.source.container.source.md5sums
     else:
         other_md5sums = {}
     for my_member, other_member, comment in super().comparisons(other):
         if my_member.name == other_member.name and \
            my_md5sums.get(my_member.name, 'my') == other_md5sums.get(other_member.name, 'other'):
             logger.debug('Skip %s: identical md5sum', my_member.name)
             continue
         yield my_member, other_member, comment
Example #47
0
def output_html(difference, css_url=None, print_func=None):
    """
    Default presenter, all in one HTML file
    """
    if print_func is None:
        print_func = print
    print_func = create_limited_print_func(print_func, Config.general.max_report_size)
    try:
        output_header(css_url, print_func)
        output_difference(difference, print_func, css_url, None, [])
    except PrintLimitReached:
        logger.debug('print limit reached')
        print_func(u"<div class='error'>Max output size reached.</div>",
                   force=True)
    output_footer(print_func)
Example #48
0
 def lookup_file(self, *names):
     """Try to fetch a specific file by digging in containers."""
     name, remainings = names[0], names[1:]
     try:
         file = self.get_member(name)
     except KeyError:
         return None
     logger.debug('lookup_file(%s) -> %s', names, file)
     diffoscope.comparators.specialize(file)
     if not remainings:
         return file
     container = file.as_container
     if not container:
         return None
     return container.lookup_file(*remainings)
Example #49
0
 def compare(self, other, source=None):
     # So now that comparators are all object-oriented, we don't have any clue on how to
     # perform a meaningful comparison right here. So we are good do the comparison backward
     # (where knowledge of the file format lies) and and then reverse it.
     if isinstance(other, NonExistingFile):
         return Difference(
             None,
             self.name,
             other.name,
             comment='Trying to compare two non-existing files.')
     logger.debug('Performing backward comparison')
     backward_diff = other.compare(self, source)
     if not backward_diff:
         return None
     return backward_diff.get_reverse()
Example #50
0
 def lookup_file(self, *names):
     """Try to fetch a specific file by digging in containers."""
     name, remainings = names[0], names[1:]
     try:
         file = self.get_member(name)
     except KeyError:
         return None
     logger.debug('lookup_file(%s) -> %s', names, file)
     diffoscope.comparators.specialize(file)
     if not remainings:
         return file
     container = file.as_container
     if not container:
         return None
     return container.lookup_file(*remainings)
Example #51
0
 def comparisons(self, other):
     if self.source:
         my_md5sums = self.source.container.source.container.source.md5sums
     else:
         my_md5sums = {}
     if other.source:
         other_md5sums = other.source.container.source.container.source.md5sums
     else:
         other_md5sums = {}
     for my_member, other_member, comment in super().comparisons(other):
         if my_member.name == other_member.name and \
            my_md5sums.get(my_member.name, 'my') == other_md5sums.get(other_member.name, 'other'):
             logger.debug('Skip %s: identical md5sum', my_member.name)
             continue
         yield my_member, other_member, comment
Example #52
0
 def extract(self, member_name, dest_dir):
     dest_name = os.path.basename(member_name)
     if not dest_name:
         raise ValueError('member_name should not be a directory')
     dest_path = os.path.join(dest_dir, dest_name)
     logger.debug('libarchive extracting %s to %s', member_name, dest_path)
     with libarchive.file_reader(self.source.path) as archive:
         for entry in archive:
             if entry.pathname == member_name:
                 logger.debug('entry found, writing %s', dest_path)
                 with open(dest_path, 'wb') as f:
                     for buf in entry.get_blocks():
                         f.write(buf)
                 return dest_path
     raise KeyError('%s not found in archive', member_name)
Example #53
0
def output_unified_diff(print_func, css_url, directory, unified_diff):
    if directory and len(unified_diff) > Config.general.separate_file_diff_size:
        # open a new file for this table
        filename="%s.html" % hashlib.md5(unified_diff.encode('utf-8')).hexdigest()
        logger.debug('separate html output for diff of size %d', len(unified_diff))
        with file_printer(directory, filename) as new_print_func:
            output_header(css_url, new_print_func)
            output_unified_diff_table(new_print_func, unified_diff)
            output_footer(new_print_func)

        print_func("<div class='ondemand'>\n")
        print_func("... <a href='%s'>load diff</a> ...\n" % escape(filename))
        print_func("</div>\n")

    else:
        output_unified_diff_table(print_func, unified_diff)
Example #54
0
    def has_same_content_as(self, other):
        logger.debug('%s has_same_content %s', self, other)
        # try comparing small files directly first
        try:
            my_size = os.path.getsize(self.path)
            other_size = os.path.getsize(other.path)
        except OSError:
            # files not readable (e.g. broken symlinks) or something else,
            # just assume they are different
            return False
        if my_size == other_size and my_size <= SMALL_FILE_THRESHOLD:
            if open(self.path, 'rb').read() == open(other.path, 'rb').read():
                return True

        return 0 == subprocess.call(['cmp', '-s', self.path, other.path],
                                    shell=False, close_fds=True)
Example #55
0
def output_html(difference, css_url=None, print_func=None):
    """
    Default presenter, all in one HTML file
    """
    if print_func is None:
        print_func = print
    print_func = create_limited_print_func(print_func,
                                           Config.general.max_report_size)
    try:
        output_header(css_url, print_func)
        output_difference(difference, print_func, css_url, None, [])
    except PrintLimitReached:
        logger.debug('print limit reached')
        print_func(u"<div class='error'>Max output size reached.</div>",
                   force=True)
    output_footer(print_func)
Example #56
0
 def filter(self, line):
     if not self._encoding:
         self._header.write(line)
         if line == b'\n':
             logger.debug("unable to determine PO encoding, let's hope it's utf-8")
             self._encoding = 'utf-8'
             return self._header.getvalue()
         found = Msgunfmt.CHARSET_RE.match(line)
         if found:
             self._encoding = found.group(1).decode('us-ascii').lower()
             return self._header.getvalue().decode(self._encoding).encode('utf-8')
         return b''
     if self._encoding != 'utf-8':
         return line.decode(self._encoding).encode('utf-8')
     else:
         return line
Example #57
0
    def has_same_content_as(self, other):
        logger.debug('%s has_same_content %s', self, other)
        # try comparing small files directly first
        try:
            my_size = os.path.getsize(self.path)
            other_size = os.path.getsize(other.path)
        except OSError:
            # files not readable (e.g. broken symlinks) or something else,
            # just assume they are different
            return False
        if my_size == other_size and my_size <= SMALL_FILE_THRESHOLD:
            if open(self.path, 'rb').read() == open(other.path, 'rb').read():
                return True

        return 0 == subprocess.call(['cmp', '-s', self.path, other.path],
                                    shell=False,
                                    close_fds=True)
Example #58
0
 def filter(self, line):
     if not self._encoding:
         self._header.write(line)
         if line == b'\n':
             logger.debug(
                 "unable to determine PO encoding, let's hope it's utf-8")
             self._encoding = 'utf-8'
             return self._header.getvalue()
         found = Msgunfmt.CHARSET_RE.match(line)
         if found:
             self._encoding = found.group(1).decode('us-ascii').lower()
             return self._header.getvalue().decode(
                 self._encoding).encode('utf-8')
         return b''
     if self._encoding != 'utf-8':
         return line.decode(self._encoding).encode('utf-8')
     else:
         return line
Example #59
0
 def compare(self, other, source=None):
     if other.path is None:
         return None
     try:
         my_md5sums = Md5sumsFile.parse_md5sums(self.path)
         other_md5sums = Md5sumsFile.parse_md5sums(other.path)
         same = set()
         for path in my_md5sums.keys() & other_md5sums.keys():
             if my_md5sums[path] == other_md5sums[path]:
                 same.add('./%s' % path)
         self.container.source.container.source.container.source.set_files_with_same_content_in_data(same)
         logger.debug('Identifed %d files as identical in data archive', len(same))
         return Difference(None, self.path, other.path, source='md5sums',
                           comment="Files in package differs")
     except ValueError as e:
         difference = self.compare_bytes(other)
         difference.add_comment('Malformed md5sums file: %s' % e)
         return difference
Example #60
0
def output_unified_diff(print_func, css_url, directory, unified_diff):
    if directory and len(
            unified_diff) > Config.general.separate_file_diff_size:
        # open a new file for this table
        filename = "%s.html" % hashlib.md5(
            unified_diff.encode('utf-8')).hexdigest()
        logger.debug('separate html output for diff of size %d',
                     len(unified_diff))
        with file_printer(directory, filename) as new_print_func:
            output_header(css_url, new_print_func)
            output_unified_diff_table(new_print_func, unified_diff)
            output_footer(new_print_func)

        print_func("<div class='ondemand'>\n")
        print_func("... <a href='%s'>load diff</a> ...\n" % escape(filename))
        print_func("</div>\n")

    else:
        output_unified_diff_table(print_func, unified_diff)