def get(self, filename): """ Download a cached distribution archive from the configured Amazon S3 bucket to the local cache. :param filename: The filename of the distribution archive (a string). :returns: The pathname of a distribution archive on the local file system or ``None``. :raises: :py:exc:`.CacheBackendError` when any underlying method fails. """ timer = Timer() # Check if the distribution archive is available. raw_key = self.get_cache_key(filename) logger.info("Checking if distribution archive is available in S3 bucket: %s", raw_key) key = self.s3_bucket.get_key(raw_key) if key is None: logger.debug("Distribution archive is not available in S3 bucket.") else: # Download the distribution archive to the local binary index. # TODO Shouldn't this use LocalCacheBackend.put() instead of # implementing the same steps manually?! logger.info("Downloading distribution archive from S3 bucket ..") local_file = os.path.join(self.config.binary_cache, filename) makedirs(os.path.dirname(local_file)) key.get_contents_to_filename(local_file) logger.debug("Finished downloading distribution archive from S3 bucket in %s.", timer) return local_file
def put(self, filename, handle): """ Store a distribution archive in the local cache. :param filename: The filename of the distribution archive (a string). :param handle: A file-like object that provides access to the distribution archive. """ file_in_cache = os.path.join(self.config.binary_cache, filename) logger.debug("Storing distribution archive in local cache: %s", file_in_cache) makedirs(os.path.dirname(file_in_cache)) # Stream the contents of the distribution archive to a temporary file # to avoid race conditions (e.g. partial reads) between multiple # processes that are using the local cache at the same time. temporary_file = '%s.tmp-%i' % (file_in_cache, os.getpid()) logger.debug("Using temporary file to avoid partial reads: %s", temporary_file) with open(temporary_file, 'wb') as temporary_file_handle: shutil.copyfileobj(handle, temporary_file_handle) # Atomically move the distribution archive into its final place # (again, to avoid race conditions between multiple processes). logger.debug("Moving temporary file into place ..") os.rename(temporary_file, file_in_cache) logger.debug("Finished caching distribution archive in local cache.")
def get(self, filename): """ Download a cached distribution archive from the configured Amazon S3 bucket to the local cache. :param filename: The filename of the distribution archive (a string). :returns: The pathname of a distribution archive on the local file system or ``None``. :raises: :py:exc:`.CacheBackendError` when any underlying method fails. """ timer = Timer() self.check_prerequisites() # Check if the distribution archive is available. raw_key = self.get_cache_key(filename) logger.info( "Checking if distribution archive is available in S3 bucket: %s", raw_key) key = self.s3_bucket.get_key(raw_key) if key is None: logger.debug("Distribution archive is not available in S3 bucket.") else: # Download the distribution archive to the local binary index. # TODO Shouldn't this use LocalCacheBackend.put() instead of # implementing the same steps manually?! logger.info("Downloading distribution archive from S3 bucket ..") local_file = os.path.join(self.config.binary_cache, filename) makedirs(os.path.dirname(local_file)) key.get_contents_to_filename(local_file) logger.debug( "Finished downloading distribution archive from S3 bucket in %s.", timer) return local_file
def put(self, filename, handle): """ Store a distribution archive in the local cache. :param filename: The filename of the distribution archive (a string). :param handle: A file-like object that provides access to the distribution archive. """ file_in_cache = os.path.join(self.config.binary_cache, filename) logger.debug("Storing distribution archive in local cache: %s", file_in_cache) makedirs(os.path.dirname(file_in_cache)) # Stream the contents of the distribution archive to a temporary file # to avoid race conditions (e.g. partial reads) between multiple # processes that are using the local cache at the same time. with AtomicReplace(file_in_cache) as temporary_file: with open(temporary_file, 'wb') as temporary_file_handle: shutil.copyfileobj(handle, temporary_file_handle) logger.debug("Finished caching distribution archive in local cache.")
def initialize_directories(self): """Automatically create the local source distribution index directory.""" makedirs(self.config.source_index)
def install_binary_dist(self, members, virtualenv_compatible=True, prefix=None, python=None, track_installed_files=False): """ Install a binary distribution into the given prefix. :param members: An iterable of tuples with two values each: 1. A :class:`tarfile.TarInfo` object. 2. A file-like object. :param prefix: The "prefix" under which the requirements should be installed. This will be a pathname like ``/usr``, ``/usr/local`` or the pathname of a virtual environment. Defaults to :attr:`.Config.install_prefix`. :param python: The pathname of the Python executable to use in the shebang line of all executable Python scripts inside the binary distribution. Defaults to :attr:`.Config.python_executable`. :param virtualenv_compatible: Whether to enable workarounds to make the resulting filenames compatible with virtual environments (defaults to :data:`True`). :param track_installed_files: If this is :data:`True` (not the default for this method because of backwards compatibility) pip-accel will create ``installed-files.txt`` as required by pip to properly uninstall packages. This method installs a binary distribution created by :class:`build_binary_dist()` into the given prefix (a directory like ``/usr``, ``/usr/local`` or a virtual environment). """ # TODO This is quite slow for modules like Django. Speed it up! Two choices: # 1. Run the external tar program to unpack the archive. This will # slightly complicate the fixing up of hashbangs. # 2. Using links? The plan: We can maintain a "seed" environment under # $PIP_ACCEL_CACHE and use symbolic and/or hard links to populate other # places based on the "seed" environment. module_search_path = set(map(os.path.normpath, sys.path)) prefix = os.path.normpath(prefix or self.config.install_prefix) python = os.path.normpath(python or self.config.python_executable) installed_files = [] for member, from_handle in members: pathname = member.name if virtualenv_compatible: # Some binary distributions include C header files (see for example # the greenlet package) however the subdirectory of include/ in a # virtual environment is a symbolic link to a subdirectory of # /usr/include/ so we should never try to install C header files # inside the directory pointed to by the symbolic link. Instead we # implement the same workaround that pip uses to avoid this # problem. pathname = re.sub('^include/', 'include/site/', pathname) if self.config.on_debian and '/site-packages/' in pathname: # On Debian based system wide Python installs the /site-packages/ # directory is not in Python's module search path while # /dist-packages/ is. We try to be compatible with this. match = re.match('^(.+?)/site-packages', pathname) if match: site_packages = os.path.normpath(os.path.join(prefix, match.group(0))) dist_packages = os.path.normpath(os.path.join(prefix, match.group(1), 'dist-packages')) if dist_packages in module_search_path and site_packages not in module_search_path: pathname = pathname.replace('/site-packages/', '/dist-packages/') pathname = os.path.join(prefix, pathname) if track_installed_files: # Track the installed file's absolute pathname. installed_files.append(pathname) directory = os.path.dirname(pathname) if not os.path.isdir(directory): logger.debug("Creating directory: %s ..", directory) makedirs(directory) logger.debug("Creating file: %s ..", pathname) with open(pathname, 'wb') as to_handle: contents = from_handle.read() if contents.startswith(b'#!/'): contents = self.fix_hashbang(contents, python) to_handle.write(contents) os.chmod(pathname, member.mode) if track_installed_files: self.update_installed_files(installed_files)
def initialize_directories(self): """Automatically create the directories for the download cache and the source index.""" for directory in [self.config.download_cache, self.config.source_index]: makedirs(directory)
def install_binary_dist(self, members, virtualenv_compatible=True, prefix=None, python=None, track_installed_files=False): """ Install a binary distribution into the given prefix. :param members: An iterable of tuples with two values each: 1. A :class:`tarfile.TarInfo` object. 2. A file-like object. :param prefix: The "prefix" under which the requirements should be installed. This will be a pathname like ``/usr``, ``/usr/local`` or the pathname of a virtual environment. Defaults to :attr:`.Config.install_prefix`. :param python: The pathname of the Python executable to use in the shebang line of all executable Python scripts inside the binary distribution. Defaults to :attr:`.Config.python_executable`. :param virtualenv_compatible: Whether to enable workarounds to make the resulting filenames compatible with virtual environments (defaults to :data:`True`). :param track_installed_files: If this is :data:`True` (not the default for this method because of backwards compatibility) pip-accel will create ``installed-files.txt`` as required by pip to properly uninstall packages. This method installs a binary distribution created by :class:`build_binary_dist()` into the given prefix (a directory like ``/usr``, ``/usr/local`` or a virtual environment). """ # TODO This is quite slow for modules like Django. Speed it up! Two choices: # 1. Run the external tar program to unpack the archive. This will # slightly complicate the fixing up of hashbangs. # 2. Using links? The plan: We can maintain a "seed" environment under # $PIP_ACCEL_CACHE and use symbolic and/or hard links to populate other # places based on the "seed" environment. module_search_path = set(map(os.path.normpath, sys.path)) prefix = os.path.normpath(prefix or self.config.install_prefix) python = os.path.normpath(python or self.config.python_executable) installed_files = [] for member, from_handle in members: pathname = member.name if virtualenv_compatible: # Some binary distributions include C header files (see for example # the greenlet package) however the subdirectory of include/ in a # virtual environment is a symbolic link to a subdirectory of # /usr/include/ so we should never try to install C header files # inside the directory pointed to by the symbolic link. Instead we # implement the same workaround that pip uses to avoid this # problem. pathname = re.sub('^include/', 'include/site/', pathname) if self.config.on_debian and '/site-packages/' in pathname: # On Debian based system wide Python installs the /site-packages/ # directory is not in Python's module search path while # /dist-packages/ is. We try to be compatible with this. match = re.match('^(.+?)/site-packages', pathname) if match: site_packages = os.path.normpath( os.path.join(prefix, match.group(0))) dist_packages = os.path.normpath( os.path.join(prefix, match.group(1), 'dist-packages')) if dist_packages in module_search_path and site_packages not in module_search_path: pathname = pathname.replace('/site-packages/', '/dist-packages/') pathname = os.path.join(prefix, pathname) if track_installed_files: # Track the installed file's absolute pathname. installed_files.append(pathname) directory = os.path.dirname(pathname) if not os.path.isdir(directory): logger.debug("Creating directory: %s ..", directory) makedirs(directory) logger.debug("Creating file: %s ..", pathname) with open(pathname, 'wb') as to_handle: contents = from_handle.read() if contents.startswith(b'#!/'): contents = self.fix_hashbang(contents, python) to_handle.write(contents) os.chmod(pathname, member.mode) if track_installed_files: self.update_installed_files(installed_files)
def install_binary_dist(self, members, virtualenv_compatible=True, prefix=None, python=None): """ Install a binary distribution created by :py:class:`build_binary_dist()` into the given prefix (a directory like ``/usr``, ``/usr/local`` or a virtual environment). :param members: An iterable of tuples with two values each: 1. A :py:class:`tarfile.TarInfo` object. 2. A file-like object. :param prefix: The "prefix" under which the requirements should be installed. This will be a pathname like ``/usr``, ``/usr/local`` or the pathname of a virtual environment. Defaults to :py:attr:`.Config.install_prefix`. :param python: The pathname of the Python executable to use in the shebang line of all executable Python scripts inside the binary distribution. Defaults to :py:attr:`.Config.python_executable`. :param virtualenv_compatible: Whether to enable workarounds to make the resulting filenames compatible with virtual environments (defaults to ``True``). :raises: :py:exc:`~exceptions.ValueError` when the Python executable is not located inside the installation prefix. """ # TODO This is quite slow for modules like Django. Speed it up! Two choices: # 1. Run the external tar program to unpack the archive. This will # slightly complicate the fixing up of hashbangs. # 2. Using links? The plan: We can maintain a "seed" environment under # $PIP_ACCEL_CACHE and use symbolic and/or hard links to populate other # places based on the "seed" environment. prefix = os.path.normpath(prefix or self.config.install_prefix) python = os.path.normpath(python or self.config.python_executable) # Make sure the installation prefix and Python executable match. if os.path.commonprefix([prefix, python]) != prefix: raise ValueError(compact(""" The configured Python executable ({executable}) is not located inside the installation prefix ({prefix}). I don't think this can work, so please make sure that if you override one configuration option you also set the other one! """, executable=python, prefix=prefix)) module_search_path = set(map(os.path.normpath, sys.path)) for member, from_handle in members: pathname = member.name if virtualenv_compatible: # Some binary distributions include C header files (see for example # the greenlet package) however the subdirectory of include/ in a # virtual environment is a symbolic link to a subdirectory of # /usr/include/ so we should never try to install C header files # inside the directory pointed to by the symbolic link. Instead we # implement the same workaround that pip uses to avoid this # problem. pathname = re.sub('^include/', 'include/site/', pathname) if self.config.on_debian and '/site-packages/' in pathname: # On Debian based system wide Python installs the /site-packages/ # directory is not in Python's module search path while # /dist-packages/ is. We try to be compatible with this. match = re.match('^(.+?)/site-packages', pathname) if match: site_packages = os.path.normpath(os.path.join(prefix, match.group(0))) dist_packages = os.path.normpath(os.path.join(prefix, match.group(1), 'dist-packages')) if dist_packages in module_search_path and site_packages not in module_search_path: pathname = pathname.replace('/site-packages/', '/dist-packages/') pathname = os.path.join(prefix, pathname) directory = os.path.dirname(pathname) if not os.path.isdir(directory): logger.debug("Creating directory: %s ..", directory) makedirs(directory) logger.debug("Creating file: %s ..", pathname) with open(pathname, 'wb') as to_handle: contents = from_handle.read() if contents.startswith(b'#!/'): contents = self.fix_hashbang(contents, python) to_handle.write(contents) os.chmod(pathname, member.mode)
def build_binary_dist_helper(self, requirement): """ Convert a single, unpacked source distribution to a binary distribution. Raises an exception if it fails to create the binary distribution (probably because of missing binary dependencies like system libraries). :param requirement: A :py:class:`.Requirement` object. :returns: The pathname of the resulting binary distribution (a string). :raises: :py:exc:`.BuildFailed` when the build reports an error. :raises: :py:exc:`.NoBuildOutput` when the build does not produce the expected binary distribution archive. """ build_timer = Timer() # Make sure the source distribution contains a setup script. setup_script = os.path.join(requirement.source_directory, 'setup.py') if not os.path.isfile(setup_script): msg = "Directory %s (%s %s) doesn't contain a source distribution!" raise InvalidSourceDistribution( msg % (requirement.source_directory, requirement.name, requirement.version)) # Let the user know what's going on. build_text = "Building %s (%s) binary distribution" % ( requirement.name, requirement.version) logger.info("%s ..", build_text) # Cleanup previously generated distributions. dist_directory = os.path.join(requirement.source_directory, 'dist') if os.path.isdir(dist_directory): logger.debug( "Cleaning up previously generated distributions in %s ..", dist_directory) shutil.rmtree(dist_directory) makedirs(dist_directory) # Create a temporary directory for pip installing into, and set up the # install_lib directory structure inside it. We do this so that we can # pip install into this as our target. temporary_dir = tempfile.mkdtemp() distutils_inst = install(Distribution()) distutils_inst.prefix = '' # This will be changed if we're in a virtualenv. distutils_inst.finalize_options() pip_target = os.path.normpath(temporary_dir + distutils_inst.install_lib) # Compose the command line needed to build the binary distribution. command_line = ' '.join( pipes.quote(t) for t in ['pip', 'install', '.', '--target', pip_target]) logger.debug("Executing external command: %s", command_line) # Redirect all output of the build to a temporary file. fd, temporary_file = tempfile.mkstemp() command_line = '%s > "%s" 2>&1' % (command_line, temporary_file) try: # Start the build. build = subprocess.Popen(['sh', '-c', command_line], cwd=requirement.source_directory) # Wait for the build to finish and provide feedback to the user in the mean time. spinner = Spinner(label=build_text, timer=build_timer) while build.poll() is None: spinner.step() # Don't tax the CPU too much. time.sleep(0.2) spinner.clear() # At this point, we may have a number of dependencies in the directory we want # to tar up that should not be part of the package distribution. For instance, # s3 will also wrap up the concurrent, futures, and requests packages. We fix # this by reading {name}-{version}-{py-version}.egg-info/installed-files.txt # and removing any files or directories that are not in it. # 1. Find the appropriate .egg-info/ directory. egg_info_dir = None egg_info_start = '-'.join([requirement.name, requirement.version]) for egg_info_root, dirs, _ in os.walk(temporary_dir): for d in dirs: if d.startswith(egg_info_start) and d.endswith( '.egg-info'): egg_info_dir = d break if egg_info_dir is not None: break # 2. If we have a .egg-info/, try to read the installed-files.txt contents. inst_files = set() if egg_info_dir is not None: egg_info_path = os.path.join(egg_info_root, egg_info_dir) inst_files_path = os.path.join(egg_info_path, 'installed-files.txt') try: with open(inst_files_path) as f: for line in f: abs_path = os.path.abspath( os.path.join(egg_info_path, line.strip())) inst_files.add(abs_path) inst_files.add(os.path.dirname(abs_path)) except IOError, ioe: loger.warn('Unable to open %s: %s' % (inst_files_path, ioe)) # 3. If we were able to get a set of files and directories that belong in the # distribution, then we can delete everything else before archiving it. if inst_files: dirs, files = next(os.walk(egg_info_root))[1:] for d in dirs: d = os.path.abspath(os.path.join(egg_info_root, d)) if d not in inst_files: logger.info('Removing %s (not part of the package)' % d) shutil.rmtree(d) for f in files: f = os.path.abspath(os.path.join(egg_info_root, f)) if f not in inst_files: logger.info('Removing %s (not part of the package)' % f) os.unlink(f) # Tar up the contents of temporary_dir into the correct file name and put it in the dist dir. tarball_path = os.path.join(temporary_dir, requirement.name) path = archive_util.make_archive(tarball_path, 'gztar', root_dir=temporary_dir) shutil.copy(path, dist_directory) # Make sure the build succeeded and produced a binary distribution archive. try: # If the build reported an error we'll try to provide the user with # some hints about what went wrong. if build.returncode != 0: raise BuildFailed( "Failed to build {name} ({version}) binary distribution!", name=requirement.name, version=requirement.version) # Check if the build created the `dist' directory (the os.listdir() # call below will raise an exception if we don't check for this). if not os.path.isdir(dist_directory): raise NoBuildOutput( "Build of {name} ({version}) did not produce a binary distribution archive!", name=requirement.name, version=requirement.version) # Check if we can find the binary distribution archive. filenames = os.listdir(dist_directory) if len(filenames) != 1: raise NoBuildOutput( "Build of {name} ({version}) produced more than one distribution archive! (matches: {filenames})", name=requirement.name, version=requirement.version, filenames=concatenate(sorted(filenames))) except Exception as e: # Decorate the exception with the output of the failed build. with open(temporary_file) as handle: build_output = handle.read() enhanced_message = compact(""" {message} Please check the build output because it will probably provide a hint about what went wrong. Build output: {output} """, message=e.args[0], output=build_output.strip()) e.args = (enhanced_message, ) raise logger.info("Finished building %s (%s) in %s.", requirement.name, requirement.version, build_timer) return os.path.join(dist_directory, filenames[0])
def initialize_directories(self): """Automatically create local directories required by pip-accel.""" makedirs(self.config.source_index) makedirs(self.config.eggs_cache)