def compare_files(file1, file2, source=None, diff_content_only=False): logger.debug( "Comparing %s (%s) and %s (%s)", file1.name, file1.__class__.__name__, file2.name, file2.__class__.__name__, ) if any_excluded(file1.name, file2.name): return None with profile('has_same_content_as', file1): if file1.has_same_content_as(file2): logger.debug( "has_same_content_as returned True; skipping further comparisons" ) return None if diff_content_only: difference = Difference(None, file1.name, file2.name) difference.add_comment("Files differ") return difference specialize(file1) specialize(file2) if isinstance(file1, MissingFile): file1.other_file = file2 elif isinstance(file2, MissingFile): file2.other_file = file1 elif file1.__class__.__name__ != file2.__class__.__name__: return file1.compare_bytes(file2, source) with profile('compare_files (cumulative)', file1): return file1.compare(file2, source)
def recognizes(cls, file): if not super().recognizes(file): return False with open(file.path, 'rb') as f: magic = f.read(3) if magic != b"PPU": return False ppu_version = f.read(3).decode('ascii', errors='ignore') if not hasattr(PpuFile, 'ppu_version'): try: with profile('command', 'ppudump'): subprocess.check_output(['ppudump', '-vh', file.path], shell=False, stderr=subprocess.STDOUT) PpuFile.ppu_version = ppu_version except subprocess.CalledProcessError as e: error = e.output.decode('utf-8', errors='ignore') m = re.search('Expecting PPU version ([0-9]+)', error) try: PpuFile.ppu_version = m.group(1) except AttributeError: if m is None: PpuFile.ppu_version = None logger.debug('Unable to read PPU version') else: raise except OSError: PpuFile.ppu_version = None logger.debug('Unable to read PPU version') return PpuFile.ppu_version == ppu_version
def specialize(file): for cls in ComparatorManager().classes: if isinstance(file, cls): return file # Does this file class match? flag = False if hasattr(cls, 'recognizes'): with profile('recognizes', file): flag = cls.recognizes(file) else: re_tests = [(x, y) for x, y in ( (cls.RE_FILE_TYPE, file.magic_file_type), (cls.RE_FILE_EXTENSION, file.name), ) if x] # If neither are defined, it's *not* a match. if re_tests: flag = all(x.search(y) for x, y in re_tests) if not flag: continue # Found a match; perform type magic logger.debug("Using %s for %s", cls.__name__, file.name) new_cls = type(cls.__name__, (cls, type(file)), {}) file.__class__ = new_cls return file logger.debug("Unidentified file. Magic says: %s", file.magic_file_type) return file
def calculate(args): with profile("main", "parse_args"): parser, post_parse = create_parser() parsed_args = parser.parse_args(args) print(parsed_args) log_handler = ProgressManager().setup(parsed_args) with setup_logging(parsed_args.debug, log_handler) as logger: post_parse(parsed_args) return run_diffoscope(parsed_args)
def path(self): if self._path is None: logger.debug("Unpacking %s from %s", self._name, self.container.source.name) assert self._temp_dir is None self._temp_dir = get_temporary_directory() with profile('container_extract', self.container): self._path = self.container.extract(self._name, self._temp_dir.name) return self._path
def compare_files(file1, file2, source=None, diff_content_only=False): logger.debug( "Comparing %s (%s) and %s (%s)", file1.name, file1.__class__.__name__, file2.name, file2.__class__.__name__, ) if any_excluded(file1.name, file2.name): return None force_details = Config().force_details with profile('has_same_content_as', file1): has_same_content = file1.has_same_content_as(file2) if has_same_content: if not force_details: logger.debug( "has_same_content_as returned True; skipping further comparisons" ) return None if diff_content_only: return None elif diff_content_only: assert not has_same_content return Difference(None, file1.name, file2.name, comment="Files differ") specialize(file1) specialize(file2) if isinstance(file1, MissingFile): file1.other_file = file2 elif isinstance(file2, MissingFile): file2.other_file = file1 elif (file1.__class__.__name__ != file2.__class__.__name__) and ( file1.as_container is None or file2.as_container is None): return file1.compare_bytes(file2, source) with profile('compare_files (cumulative)', file1): return file1.compare(file2, source)
def try_recognize(file, cls, recognizes): if isinstance(file, cls): return True # Does this file class match? with profile('recognizes', file): #logger.debug("trying %s on %s", cls, file) if not recognizes(file): return False # Found a match; perform type magic logger.debug("Using %s for %s", cls.__name__, file.name) new_cls = type(cls.__name__, (cls, type(file)), {}) file.__class__ = new_cls return True
def specialize(file): for cls in ComparatorManager().classes: if isinstance(file, cls): return file # Does this file class match? with profile('recognizes', file): if not cls.recognizes(file): continue # Found a match; perform type magic logger.debug("Using %s for %s", cls.__name__, file.name) new_cls = type(cls.__name__, (cls, type(file)), {}) file.__class__ = new_cls return file logger.debug("Unidentified file. Magic says: %s", file.magic_file_type) return file
def has_same_content_as(self, other): logger.debug('Binary.has_same_content: %s %s', self, other) if os.path.isdir(self.path) or os.path.isdir(other.path): return False # try comparing small files directly first try: my_size = os.path.getsize(self.path) other_size = os.path.getsize(other.path) except OSError: # files not readable (e.g. broken symlinks) or something else, # just assume they are different return False if my_size == other_size and my_size <= SMALL_FILE_THRESHOLD: try: with profile('command', 'cmp (internal)'): with open(self.path, 'rb') as file1, open(other.path, 'rb') as file2: return file1.read() == file2.read() except OSError: # one or both files could not be opened for some reason, # assume they are different return False return self.cmp_external(other)
def recognizes(file): if not HiFile.RE_FILE_EXTENSION.search(file.name): return False if not hasattr(HiFile, 'hi_version'): try: with profile('command', 'ghc'): output = subprocess.check_output( ['ghc', '--numeric-version'], ) except (OSError, subprocess.CalledProcessError): HiFile.hi_version = None logger.debug("Unable to read GHC version") else: major, minor, patch = [ int(x) for x in output.decode('utf-8').strip().split('.') ] HiFile.hi_version = '%d%02d%d' % (major, minor, patch) logger.debug("Found .hi version %s", HiFile.hi_version) if HiFile.hi_version is None: return False with open(file.path, 'rb') as fp: # Read magic buf = fp.read(4) if buf != HI_MAGIC: logger.debug( "Haskell interface magic mismatch. " "Found %r instead of %r or %r", buf, HI_MAGIC_32, HI_MAGIC_64, ) return False # Skip some old descriptor thingy that has varying size if buf == HI_MAGIC_32: fp.read(4) elif buf == HI_MAGIC_64: fp.read(8) # Read version, which is [Char] buf = fp.read(1) # Small list optimisation - anything less than 0xff has its length # in a single byte; everything else is 0xff followed by the 32-bit # length (big-endian). if buf[0] == 0xff: buf = fp.read(4) length = struct.unpack('>I', buf)[0] else: length = buf[0] # Now read characters; each is 32-bit big-endian. version_found = ''.join( chr(struct.unpack('>I', fp.read(4))[0]) for _ in range(length)) if version_found != HiFile.hi_version: logger.debug( "Haskell version mismatch; found %s instead of %s.", version_found, HiFile.hi_version, ) return False return True
def __del__(self): with profile('close_archive', self): self.close_archive()
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) with profile('open_archive', self): self._archive = self.open_archive()