def check_version_conflicts(dependency_set, cache=None): """ Check for version conflicts in a dependency set. :param dependency_set: A list of filenames (strings) of ``*.deb`` files. :param cache: The :class:`.PackageCache` to use (defaults to :data:`None`). :raises: :exc:`VersionConflictFound` when one or more version conflicts are found. For each Debian binary package archive given, check if a newer version of the same package exists in the same repository (directory). This analysis can be very slow. To make it faster you can use the :class:`.PackageCache`. """ timer = Timer() summary = [] dependency_set = list(map(parse_filename, dependency_set)) spinner = Spinner(total=len(dependency_set)) logger.info("Checking for version conflicts in %i package(s) ..", len(dependency_set)) for i, archive in enumerate(dependency_set, start=1): if archive.newer_versions: summary.append(compact(""" Dependency set includes {dependency} but newer version(s) of that package also exist and will take precedence: """, dependency=format_path(archive.filename))) summary.append("\n".join(" - %s" % format_path(a.filename) for a in sorted(archive.newer_versions))) spinner.step(label="Checking for version conflicts", progress=i) spinner.clear() if summary: summary.insert(0, "One or more version conflicts found:") raise VersionConflictFound('\n\n'.join(summary)) else: logger.info("No version conflicts found (took %s).", timer)
def scan_packages(repository, packages_file=None, cache=None): """ A reimplementation of the ``dpkg-scanpackages -m`` command in Python. Updates a ``Packages`` file based on the Debian package archive(s) found in the given directory. Uses :py:class:`.PackageCache` to (optionally) speed up the process significantly by caching package metadata and hashes on disk. This explains why this function can be much faster than ``dpkg-scanpackages -m``. :param repository: The pathname of a directory containing Debian package archives (a string). :param packages_file: The pathname of the ``Packages`` file to update (a string). Defaults to the ``Packages`` file in the given directory. :param cache: The :py:class:`.PackageCache` to use (defaults to ``None``). """ # By default the `Packages' file inside the repository is updated. if not packages_file: packages_file = os.path.join(repository, 'Packages') # Update the `Packages' file. timer = Timer() package_archives = glob.glob(os.path.join(repository, '*.deb')) num_packages = len(package_archives) spinner = Spinner(total=num_packages) with open(packages_file, 'wb') as handle: for i, archive in enumerate(optimize_order(package_archives), start=1): fields = dict(inspect_package_fields(archive, cache=cache)) fields.update(get_packages_entry(archive, cache=cache)) deb822_dict = unparse_control_fields(fields) deb822_dict.dump(handle) handle.write(b'\n') spinner.step(label="Scanning package metadata", progress=i) spinner.clear() logger.debug("Wrote %i entries to output Packages file in %s.", num_packages, timer)
def check_version_conflicts(dependency_set, cache=None): """ Check for version conflicts in a dependency set. For each Debian binary package archive given, check if a newer version of the same package exists in the same repository (directory). This analysis can be very slow. To make it faster you can use the :py:class:`.PackageCache`. :param dependency_set: A list of filenames (strings) of ``*.deb`` files. :param cache: The :py:class:`.PackageCache` to use (defaults to ``None``). :raises: :py:class:`VersionConflictFound` when one or more version conflicts are found. """ timer = Timer() summary = [] dependency_set = list(map(parse_filename, dependency_set)) spinner = Spinner(total=len(dependency_set)) logger.info("Checking for version conflicts in %i package(s) ..", len(dependency_set)) for i, archive in enumerate(dependency_set, start=1): if archive.newer_versions: summary.append( compact(""" Dependency set includes {dependency} but newer version(s) of that package also exist and will take precedence: """, dependency=format_path(archive.filename))) summary.append("\n".join(" - %s" % format_path(a.filename) for a in sorted(archive.newer_versions))) spinner.step(label="Checking for version conflicts", progress=i) spinner.clear() if summary: summary.insert(0, "One or more version conflicts found:") raise VersionConflictFound('\n\n'.join(summary)) else: logger.info("No version conflicts found (took %s).", timer)
def scan_packages(repository, packages_file=None, cache=None): """ A reimplementation of the ``dpkg-scanpackages -m`` command in Python. Updates a ``Packages`` file based on the Debian package archive(s) found in the given directory. Uses :class:`.PackageCache` to (optionally) speed up the process significantly by caching package metadata and hashes on disk. This explains why this function can be much faster than ``dpkg-scanpackages -m``. :param repository: The pathname of a directory containing Debian package archives (a string). :param packages_file: The pathname of the ``Packages`` file to update (a string). Defaults to the ``Packages`` file in the given directory. :param cache: The :class:`.PackageCache` to use (defaults to :data:`None`). """ # By default the `Packages' file inside the repository is updated. if not packages_file: packages_file = os.path.join(repository, 'Packages') # Update the `Packages' file. timer = Timer() package_archives = glob.glob(os.path.join(repository, '*.deb')) num_packages = len(package_archives) spinner = Spinner(total=num_packages) with open(packages_file, 'wb') as handle: for i, archive in enumerate(optimize_order(package_archives), start=1): fields = dict(inspect_package_fields(archive, cache=cache)) fields.update(get_packages_entry(archive, cache=cache)) deb822_dict = unparse_control_fields(fields) deb822_dict.dump(handle) handle.write(b'\n') spinner.step(label="Scanning package metadata", progress=i) spinner.clear() logger.debug("Wrote %i entries to output Packages file in %s.", num_packages, timer)
def check_duplicate_files(dependency_set, cache=None): """ Check a collection of Debian package archives for conflicts. Looks for duplicate files in unrelated package archives. Ignores groups of packages that have their 'Provides' and 'Replaces' fields set to a common value. Other variants of 'Conflicts' are not supported yet. Because this analysis involves both the package control file fields and the pathnames of files installed by packages it can be slow. To make it faster you can use the :py:class:`.PackageCache`. :param dependency_set: A list of filenames (strings) of ``*.deb`` files. :param cache: The :py:class:`.PackageCache` to use (defaults to ``None``). :raises: :py:class:`exceptions.ValueError` when less than two package archives are given (the duplicate check obviously only works if there are packages to compare :-). :raises: :py:class:`DuplicateFilesFound` when duplicate files are found within a group of package archives. """ timer = Timer() dependency_set = list(map(parse_filename, dependency_set)) # Make sure we have something useful to work with. num_archives = len(dependency_set) if num_archives < 2: msg = "To check for duplicate files you need to provide two or more packages archives! (%i given)" raise ValueError(msg % num_archives) # Build up a global map of all files contained in the given package archives. global_contents = collections.defaultdict(set) global_fields = {} spinner = Spinner(total=num_archives) logger.info("Checking for duplicate files in %i package archives ..", num_archives) for i, archive in enumerate(optimize_order(dependency_set), start=1): spinner.step(label="Scanning %i package archives" % num_archives, progress=i) fields, contents = inspect_package(archive.filename, cache=cache) global_fields[archive.filename] = fields for pathname, stat in contents.items(): if not stat.permissions.startswith('d'): global_contents[pathname].add(archive) spinner.clear() # Count the number of duplicate files between sets of conflicting packages # for more user friendly reporting. duplicate_files = collections.defaultdict( lambda: dict(count=0, filenames=[])) for pathname, packages in global_contents.items(): if len(packages) > 1: # Override the sort key to be the filename because we don't need # to properly sort by version (which is slow on large collections). key = tuple(sorted(packages, key=lambda p: p.filename)) duplicate_files[key]['count'] += 1 duplicate_files[key]['filenames'].append(pathname) for packages, information in sorted(duplicate_files.items()): # Never report multiple versions of the same package. if len(set(package.name for package in packages)) == 1: duplicate_files.pop(packages) continue # We check for one common case where it's easy to guarantee that # we're not dealing with broken packages: All of the packages have # marked each other as conflicting via the combination of the # fields `Provides:' and `Conflicts:'. def find_virtual_name(field_name): package_names = set() for archive in packages: field = global_fields[archive.filename].get(field_name) if field: package_names |= field.names else: return if len(package_names) == 1: return list(package_names)[0] marked_conflicts = find_virtual_name('Conflicts') marked_provides = find_virtual_name('Provides') if marked_conflicts and marked_conflicts == marked_provides: duplicate_files.pop(packages) # Boring string formatting, trying to find a way to clearly present conflicts. summary = [] for packages, information in sorted(duplicate_files.items()): block = [] conflicts = pluralize(information['count'], 'conflict', 'conflicts') block.append("Found %s between %i packages:\n" % (conflicts, len(packages))) for i, package in enumerate(sorted(packages), start=1): block.append(" %i. %s\n" % (i, package.filename)) block.append("These packages contain %s:\n" % conflicts) for i, filename in enumerate(sorted(information['filenames']), start=1): block.append(" %i. %s\n" % (i, filename)) summary.append(''.join(block)) if summary: archives_involved = set( itertools.chain.from_iterable(duplicate_files.keys())) files = pluralize(len(duplicate_files), 'duplicate file', 'duplicate files') archives = pluralize(len(archives_involved), 'package archive', 'package archives') summary.insert(0, "Found %s in %s!\n" % (files, archives)) summary.append( compact(""" Hint: If the package contents are correct you can resolve these conflicts by marking the packages as conflicting. You do this by adding the 'Conflicts' and 'Provides' fields and setting them to a common value. That should silence this message. """)) delimiter = '%s\n' % ('-' * 79) raise DuplicateFilesFound(delimiter.join(summary)) else: logger.info("No conflicting files found (took %s).", timer)
def build_binary_dist_helper(self, requirement, setup_command): """ Convert an unpacked source distribution to a binary distribution. :param requirement: A :class:`.Requirement` object. :param setup_command: A list of strings with the arguments to ``setup.py``. :returns: The pathname of the resulting binary distribution (a string). :raises: :exc:`.BuildFailed` when the build reports an error (e.g. because of missing binary dependencies like system libraries). :raises: :exc:`.NoBuildOutput` when the build does not produce the expected binary distribution archive. """ build_timer = Timer() # Make sure the source distribution contains a setup script. setup_script = os.path.join(requirement.source_directory, 'setup.py') if not os.path.isfile(setup_script): msg = "Directory %s (%s %s) doesn't contain a source distribution!" raise InvalidSourceDistribution(msg % (requirement.source_directory, requirement.name, requirement.version)) # Let the user know what's going on. build_text = "Building %s binary distribution" % requirement logger.info("%s ..", build_text) # Cleanup previously generated distributions. dist_directory = os.path.join(requirement.source_directory, 'dist') if os.path.isdir(dist_directory): logger.debug("Cleaning up previously generated distributions in %s ..", dist_directory) shutil.rmtree(dist_directory) # Let the user know (approximately) which command is being executed # (I don't think it's necessary to show them the nasty details :-). logger.debug("Executing external command: %s", ' '.join(map(pipes.quote, [self.config.python_executable, 'setup.py'] + setup_command))) # Compose the command line needed to build the binary distribution. # This nasty command line forces the use of setuptools (instead of # distutils) just like pip does. This will cause the `*.egg-info' # metadata to be written to a directory instead of a file, which # (amongst other things) enables tracking of installed files. command_line = [ self.config.python_executable, '-c', ';'.join([ 'import setuptools', '__file__=%r' % setup_script, r"exec(compile(open(__file__).read().replace('\r\n', '\n'), __file__, 'exec'))", ]) ] + setup_command # Redirect all output of the build to a temporary file. fd, temporary_file = tempfile.mkstemp() try: # Start the build. build = subprocess.Popen(command_line, cwd=requirement.source_directory, stdout=fd, stderr=fd) # Wait for the build to finish and provide feedback to the user in the mean time. spinner = Spinner(label=build_text, timer=build_timer) while build.poll() is None: spinner.step() # Don't tax the CPU too much. time.sleep(0.2) spinner.clear() # Make sure the build succeeded and produced a binary distribution archive. try: # If the build reported an error we'll try to provide the user with # some hints about what went wrong. if build.returncode != 0: raise BuildFailed("Failed to build {name} ({version}) binary distribution!", name=requirement.name, version=requirement.version) # Check if the build created the `dist' directory (the os.listdir() # call below will raise an exception if we don't check for this). if not os.path.isdir(dist_directory): raise NoBuildOutput("Build of {name} ({version}) did not produce a binary distribution archive!", name=requirement.name, version=requirement.version) # Check if we can find the binary distribution archive. filenames = os.listdir(dist_directory) if len(filenames) != 1: variables = dict(name=requirement.name, version=requirement.version, filenames=concatenate(sorted(filenames))) raise NoBuildOutput(""" Build of {name} ({version}) produced more than one distribution archive! (matches: {filenames}) """, **variables) except Exception as e: # Decorate the exception with the output of the failed build. with open(temporary_file) as handle: build_output = handle.read() enhanced_message = compact(""" {message} Please check the build output because it will probably provide a hint about what went wrong. Build output: {output} """, message=e.args[0], output=build_output.strip()) e.args = (enhanced_message,) raise logger.info("Finished building %s in %s.", requirement.name, build_timer) return os.path.join(dist_directory, filenames[0]) finally: # Close file descriptor before removing the temporary file. # Without closing Windows is complaining that the file cannot # be removed because it is used by another process. os.close(fd) os.unlink(temporary_file)
def build_binary_dist(package, version, directory, python='/usr/bin/python'): """ Convert a single, unpacked source distribution to a binary distribution. Raises an exception if it fails to create the binary distribution (probably because of missing binary dependencies like system libraries). :param package: The name of the requirement to build. :param version: The version of the requirement to build. :param directory: The directory where the unpacked sources of the requirement are available. :param python: The pathname of the Python executable to use to run ``setup.py`` (obviously this should point to a working Python installation). :returns: The pathname of the resulting binary distribution (a string). """ build_timer = Timer() # Make sure the source distribution contains a setup script. setup_script = os.path.join(directory, 'setup.py') if not os.path.isfile(setup_script): msg = "Directory %s (%s %s) doesn't contain a source distribution!" raise InvalidSourceDistribution, msg % (directory, package, version) # Let the user know what's going on. build_text = "Building binary distribution of %s (%s) .." % (package, version) logger.info("%s", build_text) # Cleanup previously generated distributions. dist_directory = os.path.join(directory, 'dist') if os.path.isdir(dist_directory): logger.debug("Cleaning up previously generated distributions in %s ..", dist_directory) shutil.rmtree(dist_directory) # Compose the command line needed to build the binary distribution. command_line = '%s setup.py bdist_dumb --format=tar' % pipes.quote(python) logger.debug("Executing external command: %s", command_line) # Redirect all output of the build to a temporary file. fd, temporary_file = tempfile.mkstemp() command_line = '%s > "%s" 2>&1' % (command_line, temporary_file) try: # Start the build. build = subprocess.Popen(['sh', '-c', command_line], cwd=directory) # Wait for build to finish, provide feedback to the user in the mean time. spinner = Spinner(build_text) while build.poll() is None: spinner.step() time.sleep(0.1) spinner.clear() # Check whether the build succeeded. if build.returncode != 0: # It it didn't we'll provide the user with some hints as to what went wrong. msg = "Failed to build binary distribution of %s (%s)!" % (package, version) logger.error("%s", msg) with open(temporary_file) as handle: logger.info("Build output (will probably provide a hint as to what went wrong):\n%s", handle.read()) raise BuildFailed, msg # Check if we can find the binary distribution archive. filenames = os.listdir(dist_directory) if len(filenames) != 1: msg = "Build process did not result in one binary distribution! (matches: %s)" raise NoBuildOutput, msg % filenames logger.info("Finished building %s (%s) in %s.", package, version, build_timer) return os.path.join(dist_directory, filenames[0]) finally: os.unlink(temporary_file)
def build_binary_dist_helper(self, requirement, setup_command): """ Convert a single, unpacked source distribution to a binary distribution. Raises an exception if it fails to create the binary distribution (probably because of missing binary dependencies like system libraries). :param requirement: A :py:class:`.Requirement` object. :param setup_command: A list of strings with the arguments to ``setup.py``. :returns: The pathname of the resulting binary distribution (a string). :raises: :py:exc:`.BuildFailed` when the build reports an error. :raises: :py:exc:`.NoBuildOutput` when the build does not produce the expected binary distribution archive. """ build_timer = Timer() # Make sure the source distribution contains a setup script. setup_script = os.path.join(requirement.source_directory, 'setup.py') if not os.path.isfile(setup_script): msg = "Directory %s (%s %s) doesn't contain a source distribution!" raise InvalidSourceDistribution(msg % (requirement.source_directory, requirement.name, requirement.version)) # Let the user know what's going on. build_text = "Building %s (%s) binary distribution" % (requirement.name, requirement.version) logger.info("%s ..", build_text) # Cleanup previously generated distributions. dist_directory = os.path.join(requirement.source_directory, 'dist') if os.path.isdir(dist_directory): logger.debug("Cleaning up previously generated distributions in %s ..", dist_directory) shutil.rmtree(dist_directory) # Compose the command line needed to build the binary distribution. command_line = ' '.join(pipes.quote(t) for t in [self.config.python_executable, 'setup.py'] + setup_command) logger.debug("Executing external command: %s", command_line) # Redirect all output of the build to a temporary file. fd, temporary_file = tempfile.mkstemp() command_line = '%s > "%s" 2>&1' % (command_line, temporary_file) try: # Start the build. build = subprocess.Popen(['sh', '-c', command_line], cwd=requirement.source_directory) # Wait for the build to finish and provide feedback to the user in the mean time. spinner = Spinner(label=build_text, timer=build_timer) while build.poll() is None: spinner.step() # Don't tax the CPU too much. time.sleep(0.2) spinner.clear() # Make sure the build succeeded and produced a binary distribution archive. try: # If the build reported an error we'll try to provide the user with # some hints about what went wrong. if build.returncode != 0: raise BuildFailed("Failed to build {name} ({version}) binary distribution!", name=requirement.name, version=requirement.version) # Check if the build created the `dist' directory (the os.listdir() # call below will raise an exception if we don't check for this). if not os.path.isdir(dist_directory): raise NoBuildOutput("Build of {name} ({version}) did not produce a binary distribution archive!", name=requirement.name, version=requirement.version) # Check if we can find the binary distribution archive. filenames = os.listdir(dist_directory) if len(filenames) != 1: raise NoBuildOutput("Build of {name} ({version}) produced more than one distribution archive! (matches: {filenames})", name=requirement.name, version=requirement.version, filenames=concatenate(sorted(filenames))) except Exception as e: # Decorate the exception with the output of the failed build. with open(temporary_file) as handle: build_output = handle.read() enhanced_message = compact(""" {message} Please check the build output because it will probably provide a hint about what went wrong. Build output: {output} """, message=e.args[0], output=build_output.strip()) e.args = (enhanced_message,) raise logger.info("Finished building %s (%s) in %s.", requirement.name, requirement.version, build_timer) return os.path.join(dist_directory, filenames[0]) finally: os.unlink(temporary_file)
def collect_related_packages(filename, cache=None): """ Collect the package archive(s) related to the given package archive. This works by parsing and resolving the dependencies of the given package to filenames of package archives, then parsing and resolving the dependencies of those package archives, etc. until no more relationships can be resolved to existing package archives. :param filename: The filename of an existing ``*.deb`` archive (a string). :param cache: The :py:class:`.PackageCache` to use (defaults to ``None``). :returns: A list of :py:class:`PackageFile` objects. This function is used to implement the ``deb-pkg-tools --collect`` command: .. code-block:: sh $ deb-pkg-tools -c /tmp python-deb-pkg-tools_1.13-1_all.deb 2014-05-18 08:33:42 deb_pkg_tools.package INFO Collecting packages related to ~/python-deb-pkg-tools_1.13-1_all.deb .. 2014-05-18 08:33:42 deb_pkg_tools.package INFO Scanning ~/python-deb-pkg-tools_1.13-1_all.deb .. 2014-05-18 08:33:42 deb_pkg_tools.package INFO Scanning ~/python-coloredlogs_0.4.8-1_all.deb .. 2014-05-18 08:33:42 deb_pkg_tools.package INFO Scanning ~/python-chardet_2.2.1-1_all.deb .. 2014-05-18 08:33:42 deb_pkg_tools.package INFO Scanning ~/python-humanfriendly_1.7.1-1_all.deb .. 2014-05-18 08:33:42 deb_pkg_tools.package INFO Scanning ~/python-debian_0.1.21-1_all.deb .. Found 5 package archives: - ~/python-chardet_2.2.1-1_all.deb - ~/python-coloredlogs_0.4.8-1_all.deb - ~/python-deb-pkg-tools_1.13-1_all.deb - ~/python-humanfriendly_1.7.1-1_all.deb - ~/python-debian_0.1.21-1_all.deb Copy 5 package archives to /tmp? [Y/n] y 2014-05-18 08:33:44 deb_pkg_tools.cli INFO Done! Copied 5 package archives to /tmp. .. note:: The implementation of this function can be somewhat slow when you're dealing with a lot of packages, but this function is meant to be used interactively so I don't think it will be a big issue. """ filename = os.path.abspath(filename) logger.info("Collecting packages related to %s ..", format_path(filename)) # Internal state. relationship_sets = set() packages_to_scan = [filename] related_packages = collections.defaultdict(list) # Preparations. available_packages = find_package_archives(os.path.dirname(filename)) # Loop to collect the related packages. num_scanned_packages = 0 spinner = Spinner(total=len(available_packages) / 2) while packages_to_scan: filename = packages_to_scan.pop(0) logger.debug("Scanning %s ..", format_path(filename)) # Find the relationships of the given package. fields = inspect_package_fields(filename, cache) if 'Depends' in fields: relationship_sets.add(fields['Depends']) # Collect all related packages from the given directory. packages_to_skip = [] for package in available_packages: package_matches = None for relationships in relationship_sets: status = relationships.matches(package.name, package.version) if status == True and package_matches != False: package_matches = True elif status == False: package_matches = False break if package_matches == True: logger.debug("Package archive matched all relationships: %s", package.filename) related_packages[package.name].append(package) packages_to_scan.append(package.filename) packages_to_skip.append(package) elif package_matches == False: # If we are sure we can exclude a package from all further # iterations, it's worth it to speed up the process on big # dependency sets. packages_to_skip.append(package) spinner.step(label="Collecting related packages", progress=num_scanned_packages) for package in packages_to_skip: available_packages.remove(package) num_scanned_packages += 1 spinner.clear() # Pick the latest version of the collected packages. return list(map(find_latest_version, related_packages.values()))
def build_binary_dist(package, version, directory, python='/usr/bin/python'): """ Convert a single, unpacked source distribution to a binary distribution. Raises an exception if it fails to create the binary distribution (probably because of missing binary dependencies like system libraries). :param package: The name of the requirement to build. :param version: The version of the requirement to build. :param directory: The directory where the unpacked sources of the requirement are available. :param python: The pathname of the Python executable to use to run ``setup.py`` (obviously this should point to a working Python installation). :returns: The pathname of the resulting binary distribution (a string). """ build_timer = Timer() # Make sure the source distribution contains a setup script. setup_script = os.path.join(directory, 'setup.py') if not os.path.isfile(setup_script): msg = "Directory %s (%s %s) doesn't contain a source distribution!" raise InvalidSourceDistribution(msg % (directory, package, version)) # Let the user know what's going on. build_text = "Building binary distribution of %s (%s) .." % (package, version) logger.info("%s", build_text) # Cleanup previously generated distributions. dist_directory = os.path.join(directory, 'dist') if os.path.isdir(dist_directory): logger.debug("Cleaning up previously generated distributions in %s ..", dist_directory) shutil.rmtree(dist_directory) # Compose the command line needed to build the binary distribution. command_line = '%s setup.py bdist_dumb --format=tar' % pipes.quote(python) logger.debug("Executing external command: %s", command_line) # Redirect all output of the build to a temporary file. fd, temporary_file = tempfile.mkstemp() command_line = '%s > "%s" 2>&1' % (command_line, temporary_file) try: # Start the build. build = subprocess.Popen(['sh', '-c', command_line], cwd=directory) # Wait for build to finish, provide feedback to the user in the mean time. spinner = Spinner(build_text) while build.poll() is None: spinner.step() time.sleep(0.1) spinner.clear() # Check whether the build succeeded. if build.returncode != 0: # It it didn't we'll provide the user with some hints as to what went wrong. msg = "Failed to build binary distribution of %s (%s)!" % (package, version) logger.error("%s", msg) with open(temporary_file) as handle: logger.info( "Build output (will probably provide a hint as to what went wrong):\n%s", handle.read()) raise BuildFailed(msg) # Check if we can find the binary distribution archive. filenames = os.listdir(dist_directory) if len(filenames) != 1: msg = "Build process did not result in one binary distribution! (matches: %s)" raise NoBuildOutput(msg % filenames) logger.info("Finished building %s (%s) in %s.", package, version, build_timer) return os.path.join(dist_directory, filenames[0]) finally: os.unlink(temporary_file)
def build_binary_dist_helper(self, requirement, setup_command): """ Convert an unpacked source distribution to a binary distribution. :param requirement: A :class:`.Requirement` object. :param setup_command: A list of strings with the arguments to ``setup.py``. :returns: The pathname of the resulting binary distribution (a string). :raises: :exc:`.BuildFailed` when the build reports an error (e.g. because of missing binary dependencies like system libraries). :raises: :exc:`.NoBuildOutput` when the build does not produce the expected binary distribution archive. """ build_timer = Timer() # Make sure the source distribution contains a setup script. setup_script = os.path.join(requirement.source_directory, 'setup.py') if not os.path.isfile(setup_script): msg = "Directory %s (%s %s) doesn't contain a source distribution!" raise InvalidSourceDistribution( msg % (requirement.source_directory, requirement.name, requirement.version)) # Let the user know what's going on. build_text = "Building %s binary distribution" % requirement logger.info("%s ..", build_text) # Cleanup previously generated distributions. dist_directory = os.path.join(requirement.source_directory, 'dist') if os.path.isdir(dist_directory): logger.debug( "Cleaning up previously generated distributions in %s ..", dist_directory) shutil.rmtree(dist_directory) # Let the user know (approximately) which command is being executed # (I don't think it's necessary to show them the nasty details :-). logger.debug( "Executing external command: %s", ' '.join( map(pipes.quote, [self.config.python_executable, 'setup.py'] + setup_command))) # Compose the command line needed to build the binary distribution. # This nasty command line forces the use of setuptools (instead of # distutils) just like pip does. This will cause the `*.egg-info' # metadata to be written to a directory instead of a file, which # (amongst other things) enables tracking of installed files. command_line = [ self.config.python_executable, '-c', ';'.join([ 'import setuptools', '__file__=%r' % setup_script, r"exec(compile(open(__file__).read().replace('\r\n', '\n'), __file__, 'exec'))", ]) ] + setup_command # Redirect all output of the build to a temporary file. fd, temporary_file = tempfile.mkstemp() try: # Start the build. build = subprocess.Popen(command_line, cwd=requirement.source_directory, stdout=fd, stderr=fd) # Wait for the build to finish and provide feedback to the user in the mean time. spinner = Spinner(label=build_text, timer=build_timer) while build.poll() is None: spinner.step() # Don't tax the CPU too much. time.sleep(0.2) spinner.clear() # Make sure the build succeeded and produced a binary distribution archive. try: # If the build reported an error we'll try to provide the user with # some hints about what went wrong. if build.returncode != 0: raise BuildFailed( "Failed to build {name} ({version}) binary distribution!", name=requirement.name, version=requirement.version) # Check if the build created the `dist' directory (the os.listdir() # call below will raise an exception if we don't check for this). if not os.path.isdir(dist_directory): raise NoBuildOutput( "Build of {name} ({version}) did not produce a binary distribution archive!", name=requirement.name, version=requirement.version) # Check if we can find the binary distribution archive. filenames = os.listdir(dist_directory) if len(filenames) != 1: variables = dict(name=requirement.name, version=requirement.version, filenames=concatenate(sorted(filenames))) raise NoBuildOutput( """ Build of {name} ({version}) produced more than one distribution archive! (matches: {filenames}) """, **variables) except Exception as e: # Decorate the exception with the output of the failed build. with open(temporary_file) as handle: build_output = handle.read() enhanced_message = compact(""" {message} Please check the build output because it will probably provide a hint about what went wrong. Build output: {output} """, message=e.args[0], output=build_output.strip()) e.args = (enhanced_message, ) raise logger.info("Finished building %s in %s.", requirement.name, build_timer) return os.path.join(dist_directory, filenames[0]) finally: # Close file descriptor before removing the temporary file. # Without closing Windows is complaining that the file cannot # be removed because it is used by another process. os.close(fd) os.unlink(temporary_file)
def check_duplicate_files(dependency_set, cache=None): """ Check a collection of Debian package archives for conflicts. :param dependency_set: A list of filenames (strings) of ``*.deb`` files. :param cache: The :class:`.PackageCache` to use (defaults to :data:`None`). :raises: :exc:`exceptions.ValueError` when less than two package archives are given (the duplicate check obviously only works if there are packages to compare :-). :raises: :exc:`DuplicateFilesFound` when duplicate files are found within a group of package archives. This check looks for duplicate files in package archives that concern different packages. Ignores groups of packages that have their 'Provides' and 'Replaces' fields set to a common value. Other variants of 'Conflicts' are not supported yet. Because this analysis involves both the package control file fields and the pathnames of files installed by packages it can be really slow. To make it faster you can use the :class:`.PackageCache`. """ timer = Timer() dependency_set = list(map(parse_filename, dependency_set)) # Make sure we have something useful to work with. num_archives = len(dependency_set) if num_archives < 2: msg = "To check for duplicate files you need to provide two or more packages archives! (%i given)" raise ValueError(msg % num_archives) # Build up a global map of all files contained in the given package archives. global_contents = collections.defaultdict(set) global_fields = {} spinner = Spinner(total=num_archives) logger.info("Checking for duplicate files in %i package archives ..", num_archives) for i, archive in enumerate(optimize_order(dependency_set), start=1): spinner.step(label="Scanning %i package archives" % num_archives, progress=i) fields, contents = inspect_package(archive.filename, cache=cache) global_fields[archive.filename] = fields for pathname, stat in contents.items(): if not stat.permissions.startswith('d'): global_contents[pathname].add(archive) spinner.clear() # Count the number of duplicate files between sets of conflicting packages # for more user friendly reporting. duplicate_files = collections.defaultdict(lambda: dict(count=0, filenames=[])) for pathname, packages in global_contents.items(): if len(packages) > 1: # Override the sort key to be the filename because we don't need # to properly sort by version (which is slow on large collections). key = tuple(sorted(packages, key=lambda p: p.filename)) duplicate_files[key]['count'] += 1 duplicate_files[key]['filenames'].append(pathname) for packages, information in sorted(duplicate_files.items()): # Never report multiple versions of the same package. if len(set(package.name for package in packages)) == 1: duplicate_files.pop(packages) continue # We check for one common case where it's easy to guarantee that # we're not dealing with broken packages: All of the packages have # marked each other as conflicting via the combination of the # fields `Provides:' and `Conflicts:'. def find_virtual_name(field_name): package_names = set() for archive in packages: field = global_fields[archive.filename].get(field_name) if field: package_names |= field.names else: return if len(package_names) == 1: return list(package_names)[0] marked_conflicts = find_virtual_name('Conflicts') marked_provides = find_virtual_name('Provides') if marked_conflicts and marked_conflicts == marked_provides: duplicate_files.pop(packages) # Boring string formatting, trying to find a way to clearly present conflicts. summary = [] for packages, information in sorted(duplicate_files.items()): block = [] conflicts = pluralize(information['count'], 'conflict', 'conflicts') block.append("Found %s between %i packages:\n" % (conflicts, len(packages))) for i, package in enumerate(sorted(packages), start=1): block.append(" %i. %s\n" % (i, package.filename)) block.append("These packages contain %s:\n" % conflicts) for i, filename in enumerate(sorted(information['filenames']), start=1): block.append(" %i. %s\n" % (i, filename)) summary.append(''.join(block)) if summary: archives_involved = set(itertools.chain.from_iterable(duplicate_files.keys())) files = pluralize(len(duplicate_files), 'duplicate file', 'duplicate files') archives = pluralize(len(archives_involved), 'package archive', 'package archives') summary.insert(0, "Found %s in %s!\n" % (files, archives)) summary.append(compact(""" Hint: If the package contents are correct you can resolve these conflicts by marking the packages as conflicting. You do this by adding the 'Conflicts' and 'Provides' fields and setting them to a common value. That should silence this message. """)) delimiter = '%s\n' % ('-' * 79) raise DuplicateFilesFound(delimiter.join(summary)) else: logger.info("No conflicting files found (took %s).", timer)
def collect_related_packages_helper(candidate_archives, given_archive, cache): """ Internal helper that enables :py:func:`collect_related_packages()` to perform conflict resolution (which would otherwise get very complex). """ # Enable mutation of the candidate archives data structure inside the scope # of this function without mutating the original data structure. candidate_archives = copy.deepcopy(candidate_archives) # Prepare some internal state. archives_to_scan = [given_archive] collected_archives = [] relationship_sets = set() # Loop to collect the related packages. spinner = Spinner(label="Collecting related packages", timer=Timer()) while archives_to_scan: selected_archive = archives_to_scan.pop(0) logger.debug("Scanning %s ..", format_path(selected_archive.filename)) # Find the relationships of the given package. control_fields = inspect_package_fields(selected_archive.filename, cache) for field_name in DEPENDENCY_FIELDS: if field_name in control_fields: relationship_sets.add(control_fields[field_name]) # For each group of package archives sharing the same package name .. for package_name in sorted(candidate_archives): # For each version of the package .. for package_archive in list(candidate_archives[package_name]): package_matches = match_relationships(package_archive, relationship_sets) spinner.step() if package_matches == True: logger.debug( "Package archive matched all relationships: %s", package_archive.filename) # Move the selected version of the package archive from the # candidates to the list of selected package archives. collected_archives.append(package_archive) # Prepare to scan and collect dependencies of the selected # package archive in a future iteration of the outermost # (while) loop. archives_to_scan.append(package_archive) # Ignore all other versions of the package inside this call # to collect_related_packages_helper(). candidate_archives.pop(package_name) # Break out of the loop to avoid scanning other versions of # this package archive; we've made our choice now. break elif package_matches == False: # If we're sure we can exclude this version of the package # from future iterations it could be worth it to speed up # the process on big repositories / dependency sets. candidate_archives[package_name].remove(package_archive) # Keep looking for a match in another version. elif package_matches == None: # Break out of the loop that scans multiple versions of the # same package because none of the relationship sets collected # so far reference the name of this package (this is intended # as a harmless optimization). break spinner.clear() # Check for conflicts in the collected set of related package archives. conflicts = [ a for a in collected_archives if not match_relationships(a, relationship_sets) ] if conflicts: raise CollectedPackagesConflict(conflicts) else: return collected_archives
def build_binary_dist_helper(self, requirement): """ Convert a single, unpacked source distribution to a binary distribution. Raises an exception if it fails to create the binary distribution (probably because of missing binary dependencies like system libraries). :param requirement: A :py:class:`.Requirement` object. :returns: The pathname of the resulting binary distribution (a string). :raises: :py:exc:`.BuildFailed` when the build reports an error. :raises: :py:exc:`.NoBuildOutput` when the build does not produce the expected binary distribution archive. """ build_timer = Timer() # Make sure the source distribution contains a setup script. setup_script = os.path.join(requirement.source_directory, 'setup.py') if not os.path.isfile(setup_script): msg = "Directory %s (%s %s) doesn't contain a source distribution!" raise InvalidSourceDistribution( msg % (requirement.source_directory, requirement.name, requirement.version)) # Let the user know what's going on. build_text = "Building %s (%s) binary distribution" % ( requirement.name, requirement.version) logger.info("%s ..", build_text) # Cleanup previously generated distributions. dist_directory = os.path.join(requirement.source_directory, 'dist') if os.path.isdir(dist_directory): logger.debug( "Cleaning up previously generated distributions in %s ..", dist_directory) shutil.rmtree(dist_directory) makedirs(dist_directory) # Create a temporary directory for pip installing into, and set up the # install_lib directory structure inside it. We do this so that we can # pip install into this as our target. temporary_dir = tempfile.mkdtemp() distutils_inst = install(Distribution()) distutils_inst.prefix = '' # This will be changed if we're in a virtualenv. distutils_inst.finalize_options() pip_target = os.path.normpath(temporary_dir + distutils_inst.install_lib) # Compose the command line needed to build the binary distribution. command_line = ' '.join( pipes.quote(t) for t in ['pip', 'install', '.', '--target', pip_target]) logger.debug("Executing external command: %s", command_line) # Redirect all output of the build to a temporary file. fd, temporary_file = tempfile.mkstemp() command_line = '%s > "%s" 2>&1' % (command_line, temporary_file) try: # Start the build. build = subprocess.Popen(['sh', '-c', command_line], cwd=requirement.source_directory) # Wait for the build to finish and provide feedback to the user in the mean time. spinner = Spinner(label=build_text, timer=build_timer) while build.poll() is None: spinner.step() # Don't tax the CPU too much. time.sleep(0.2) spinner.clear() # At this point, we may have a number of dependencies in the directory we want # to tar up that should not be part of the package distribution. For instance, # s3 will also wrap up the concurrent, futures, and requests packages. We fix # this by reading {name}-{version}-{py-version}.egg-info/installed-files.txt # and removing any files or directories that are not in it. # 1. Find the appropriate .egg-info/ directory. egg_info_dir = None egg_info_start = '-'.join([requirement.name, requirement.version]) for egg_info_root, dirs, _ in os.walk(temporary_dir): for d in dirs: if d.startswith(egg_info_start) and d.endswith( '.egg-info'): egg_info_dir = d break if egg_info_dir is not None: break # 2. If we have a .egg-info/, try to read the installed-files.txt contents. inst_files = set() if egg_info_dir is not None: egg_info_path = os.path.join(egg_info_root, egg_info_dir) inst_files_path = os.path.join(egg_info_path, 'installed-files.txt') try: with open(inst_files_path) as f: for line in f: abs_path = os.path.abspath( os.path.join(egg_info_path, line.strip())) inst_files.add(abs_path) inst_files.add(os.path.dirname(abs_path)) except IOError, ioe: loger.warn('Unable to open %s: %s' % (inst_files_path, ioe)) # 3. If we were able to get a set of files and directories that belong in the # distribution, then we can delete everything else before archiving it. if inst_files: dirs, files = next(os.walk(egg_info_root))[1:] for d in dirs: d = os.path.abspath(os.path.join(egg_info_root, d)) if d not in inst_files: logger.info('Removing %s (not part of the package)' % d) shutil.rmtree(d) for f in files: f = os.path.abspath(os.path.join(egg_info_root, f)) if f not in inst_files: logger.info('Removing %s (not part of the package)' % f) os.unlink(f) # Tar up the contents of temporary_dir into the correct file name and put it in the dist dir. tarball_path = os.path.join(temporary_dir, requirement.name) path = archive_util.make_archive(tarball_path, 'gztar', root_dir=temporary_dir) shutil.copy(path, dist_directory) # Make sure the build succeeded and produced a binary distribution archive. try: # If the build reported an error we'll try to provide the user with # some hints about what went wrong. if build.returncode != 0: raise BuildFailed( "Failed to build {name} ({version}) binary distribution!", name=requirement.name, version=requirement.version) # Check if the build created the `dist' directory (the os.listdir() # call below will raise an exception if we don't check for this). if not os.path.isdir(dist_directory): raise NoBuildOutput( "Build of {name} ({version}) did not produce a binary distribution archive!", name=requirement.name, version=requirement.version) # Check if we can find the binary distribution archive. filenames = os.listdir(dist_directory) if len(filenames) != 1: raise NoBuildOutput( "Build of {name} ({version}) produced more than one distribution archive! (matches: {filenames})", name=requirement.name, version=requirement.version, filenames=concatenate(sorted(filenames))) except Exception as e: # Decorate the exception with the output of the failed build. with open(temporary_file) as handle: build_output = handle.read() enhanced_message = compact(""" {message} Please check the build output because it will probably provide a hint about what went wrong. Build output: {output} """, message=e.args[0], output=build_output.strip()) e.args = (enhanced_message, ) raise logger.info("Finished building %s (%s) in %s.", requirement.name, requirement.version, build_timer) return os.path.join(dist_directory, filenames[0])
def collect_related_packages_helper(candidate_archives, given_archive, cache): """ Internal helper that enables :py:func:`collect_related_packages()` to perform conflict resolution (which would otherwise get very complex). """ # Enable mutation of the candidate archives data structure inside the scope # of this function without mutating the original data structure. candidate_archives = copy.deepcopy(candidate_archives) # Prepare some internal state. archives_to_scan = [given_archive] collected_archives = [] relationship_sets = set() # Loop to collect the related packages. spinner = Spinner(label="Collecting related packages", timer=Timer()) while archives_to_scan: selected_archive = archives_to_scan.pop(0) logger.debug("Scanning %s ..", format_path(selected_archive.filename)) # Find the relationships of the given package. control_fields = inspect_package_fields(selected_archive.filename, cache) for field_name in DEPENDENCY_FIELDS: if field_name in control_fields: relationship_sets.add(control_fields[field_name]) # For each group of package archives sharing the same package name .. for package_name in sorted(candidate_archives): # For each version of the package .. for package_archive in list(candidate_archives[package_name]): package_matches = match_relationships(package_archive, relationship_sets) spinner.step() if package_matches == True: logger.debug("Package archive matched all relationships: %s", package_archive.filename) # Move the selected version of the package archive from the # candidates to the list of selected package archives. collected_archives.append(package_archive) # Prepare to scan and collect dependencies of the selected # package archive in a future iteration of the outermost # (while) loop. archives_to_scan.append(package_archive) # Ignore all other versions of the package inside this call # to collect_related_packages_helper(). candidate_archives.pop(package_name) # Break out of the loop to avoid scanning other versions of # this package archive; we've made our choice now. break elif package_matches == False: # If we're sure we can exclude this version of the package # from future iterations it could be worth it to speed up # the process on big repositories / dependency sets. candidate_archives[package_name].remove(package_archive) # Keep looking for a match in another version. elif package_matches == None: # Break out of the loop that scans multiple versions of the # same package because none of the relationship sets collected # so far reference the name of this package (this is intended # as a harmless optimization). break spinner.clear() # Check for conflicts in the collected set of related package archives. conflicts = [a for a in collected_archives if not match_relationships(a, relationship_sets)] if conflicts: raise CollectedPackagesConflict(conflicts) else: return collected_archives