def cache_distribution(cls, zf, source, target_dir): """Possibly cache a wheel from within a zipfile into `target_dir`. Given a zipfile handle and a source path prefix corresponding to a wheel install embedded within that zip, maybe extract the wheel install into the target cache and then return a distribution from the cache. :param zf: An open zip file (a zipped pex). :type zf: :class:`zipfile.ZipFile` :param str source: The path prefix of a wheel install embedded in the zip file. :param str target_dir: The directory to cache the distribution in if not already cached. :returns: The cached distribution. :rtype: :class:`pex.third_party.pkg_resources.Distribution` """ with atomic_directory(target_dir, source=source) as target_dir_tmp: if target_dir_tmp is None: TRACER.log('Using cached {}'.format(target_dir)) else: with TRACER.timed('Caching {}:{} in {}'.format( zf.filename, source, target_dir)): for name in zf.namelist(): if name.startswith(source) and not name.endswith('/'): zf.extract(name, target_dir_tmp) dist = DistributionHelper.distribution_from_path(target_dir) assert dist is not None, 'Failed to cache distribution '.format(source) return dist
def add_distribution(self, dist, dist_name=None): """Add a :class:`pkg_resources.Distribution` from its handle. :param dist: The distribution to add to this environment. :keyword dist_name: (optional) The name of the distribution e.g. 'Flask-0.10.0'. By default this will be inferred from the distribution itself should it be formatted in a standard way. :type dist: :class:`pkg_resources.Distribution` """ if dist.location in self._distributions: TRACER.log("Skipping adding {} - already added from {}".format( dist, dist.location), V=9) return self._ensure_unfrozen("Adding a distribution") dist_name = dist_name or os.path.basename(dist.location) self._distributions[dist.location] = dist if os.path.isdir(dist.location): dist_hash = self._add_dist_dir(dist.location, dist_name) elif dist.location.endswith(".whl"): dist_hash = self._add_dist_wheel_file(dist.location, dist_name) else: raise self.InvalidDistribution( "Unsupported distribution type: {}, pex can only accept dist " "dirs and wheels.".format(dist)) # add dependency key so that it can rapidly be retrieved from cache self._pex_info.add_distribution(dist_name, dist_hash)
def demote_bootstrap(cls): TRACER.log('Bootstrap complete, performing final sys.path modifications...') should_log = {level: TRACER.should_log(V=level) for level in range(1, 10)} def log(msg, V=1): if should_log.get(V, False): print('pex: {}'.format(msg), file=sys.stderr) # Remove the third party resources pex uses and demote pex bootstrap code to the end of # sys.path for the duration of the run to allow conflicting versions supplied by user # dependencies to win during the course of the execution of user code. unregister_finders() third_party.uninstall() bootstrap = Bootstrap.locate() log('Demoting code from %s' % bootstrap, V=2) for module in bootstrap.demote(): log('un-imported {}'.format(module), V=9) import pex log('Re-imported pex from {}'.format(pex.__path__), V=3) log('PYTHONPATH contains:') for element in sys.path: log(' %c %s' % (' ' if os.path.exists(element) else '*', element)) log(' * - paths that do not exist or will be imported via zipimport')
def create( cls, venv_dir, # type: str interpreter=None, # type: Optional[PythonInterpreter] force=False, # type: bool ): # type: (...) -> Virtualenv venv_dir = os.path.abspath(venv_dir) safe_mkdir(venv_dir, clean=force) interpreter = interpreter or PythonInterpreter.get() if interpreter.is_venv: base_interpreter = interpreter.resolve_base_interpreter() TRACER.log( "Ignoring enclosing venv {} and using its base interpreter {} to create venv at {}" " instead.".format(interpreter.prefix, base_interpreter.binary, venv_dir), V=3, ) interpreter = base_interpreter if interpreter.version[0] >= 3 and not interpreter.identity.interpreter == "PyPy": # N.B.: PyPy3 comes equipped with a venv module but it does not seem to work. interpreter.execute(args=["-m", "venv", "--without-pip", venv_dir]) else: virtualenv_py = resource_string(__name__, "virtualenv_16.7.10_py") with named_temporary_file(mode="wb") as fp: fp.write(virtualenv_py) fp.close() interpreter.execute( args=[fp.name, "--no-pip", "--no-setuptools", "--no-wheel", venv_dir], ) return cls(venv_dir)
def set_script(self, script): """Set the entry point of this PEX environment based upon a distribution script. :param script: The script name as defined either by a console script or ordinary script within the setup.py of one of the distributions added to the PEX. :raises: :class:`PEXBuilder.InvalidExecutableSpecification` if the script is not found in any distribution added to the PEX. """ # check if 'script' is a console_script dist, entry_point = get_entry_point_from_console_script( script, self._distributions.values()) if entry_point: self.set_entry_point(entry_point) TRACER.log("Set entrypoint to console_script %r in %r" % (entry_point, dist)) return # check if 'script' is an ordinary script dist_script = get_script_from_distributions( script, self._distributions.values()) if dist_script: if self._pex_info.entry_point: raise self.InvalidExecutableSpecification( "Cannot set both entry point and script of PEX!") self._pex_info.script = script TRACER.log("Set entrypoint to script %r in %r" % (script, dist_script.dist)) return raise self.InvalidExecutableSpecification( "Could not find script %r in any distribution %s within PEX!" % (script, ", ".join(str(d) for d in self._distributions.values())))
def _execute(self): force_interpreter = self._vars.PEX_INTERPRETER self.clean_environment() if force_interpreter: TRACER.log('PEX_INTERPRETER specified, dropping into interpreter') return self.execute_interpreter() if self._pex_info_overrides.script and self._pex_info_overrides.entry_point: die('Cannot specify both script and entry_point for a PEX!') if self._pex_info.script and self._pex_info.entry_point: die('Cannot specify both script and entry_point for a PEX!') if self._pex_info_overrides.script: return self.execute_script(self._pex_info_overrides.script) elif self._pex_info_overrides.entry_point: return self.execute_entry(self._pex_info_overrides.entry_point) elif self._pex_info.script: return self.execute_script(self._pex_info.script) elif self._pex_info.entry_point: return self.execute_entry(self._pex_info.entry_point) else: TRACER.log('No entry point specified, dropping into interpreter') return self.execute_interpreter()
def patch_sys(self): # type: () -> None """Patch sys with all site scrubbed.""" inherit_path = self._vars.PEX_INHERIT_PATH if inherit_path == InheritPath.FALSE: inherit_path = self._pex_info.inherit_path def patch_dict(old_value, new_value): # type: (Dict[_K, _V], Mapping[_K, _V]) -> None old_value.clear() old_value.update(new_value) def patch_all(path, path_importer_cache, modules): # type: (List[str], Mapping[str, Any], Mapping[str, ModuleType]) -> None sys.path[:] = path patch_dict(sys.path_importer_cache, path_importer_cache) patch_dict(sys.modules, modules) new_sys_path, new_sys_path_importer_cache, new_sys_modules = self.minimum_sys(inherit_path) if self._vars.PEX_EXTRA_SYS_PATH: TRACER.log("Adding %s to sys.path" % self._vars.PEX_EXTRA_SYS_PATH) new_sys_path.extend(self._vars.PEX_EXTRA_SYS_PATH.split(":")) TRACER.log("New sys.path: %s" % new_sys_path) patch_all(new_sys_path, new_sys_path_importer_cache, new_sys_modules)
def run(self, args=(), with_chroot=False, blocking=True, setsid=False, env=None, **kwargs): """Run the PythonEnvironment in an interpreter in a subprocess. :keyword args: Additional arguments to be passed to the application being invoked by the environment. :keyword with_chroot: Run with cwd set to the environment's working directory. :keyword blocking: If true, return the return code of the subprocess. If false, return the Popen object of the invoked subprocess. :keyword setsid: If true, run the PEX in a separate operating system session. :keyword env: An optional environment dict to use as the PEX subprocess environment. If none is passed, the ambient environment is inherited. Remaining keyword arguments are passed directly to subprocess.Popen. """ if env is not None: # If explicit env vars are passed, we don't want clean any of these. env = env.copy() else: env = os.environ.copy() self._clean_environment(env=env) cmdline = self.cmdline(args) TRACER.log("PEX.run invoking %s" % " ".join(cmdline)) process = Executor.open_process( cmdline, cwd=self._pex if with_chroot else os.getcwd(), preexec_fn=os.setsid if setsid else None, stdin=kwargs.pop("stdin", None), stdout=kwargs.pop("stdout", None), stderr=kwargs.pop("stderr", None), env=env, **kwargs ) return process.wait() if blocking else process
def _execute(self): # type: () -> Any force_interpreter = self._vars.PEX_INTERPRETER self._clean_environment(strip_pex_env=self._pex_info.strip_pex_env) if force_interpreter: TRACER.log("PEX_INTERPRETER specified, dropping into interpreter") return self.execute_interpreter() if self._pex_info_overrides.script and self._pex_info_overrides.entry_point: return "Cannot specify both script and entry_point for a PEX!" if self._pex_info.script and self._pex_info.entry_point: return "Cannot specify both script and entry_point for a PEX!" if self._pex_info_overrides.script: return self.execute_script(self._pex_info_overrides.script) elif self._pex_info_overrides.entry_point: return self.execute_entry(self._pex_info_overrides.entry_point) elif self._pex_info.script: return self.execute_script(self._pex_info.script) elif self._pex_info.entry_point: return self.execute_entry(self._pex_info.entry_point) else: TRACER.log("No entry point specified, dropping into interpreter") return self.execute_interpreter()
def cache_distribution(cls, zf, source, target_dir): # type: (ZipFile, str, str) -> Distribution """Possibly cache a wheel from within a zipfile into `target_dir`. Given a zipfile handle and a source path prefix corresponding to a wheel install embedded within that zip, maybe extract the wheel install into the target cache and then return a distribution from the cache. :param zf: An open zip file (a zipped pex). :param source: The path prefix of a wheel install embedded in the zip file. :param target_dir: The directory to cache the distribution in if not already cached. :returns: The cached distribution. """ with atomic_directory(target_dir, source=source, exclusive=True) as target_dir_tmp: if target_dir_tmp is None: TRACER.log("Using cached {}".format(target_dir)) else: with TRACER.timed("Caching {}:{} in {}".format( zf.filename, source, target_dir)): for name in zf.namelist(): if name.startswith(source) and not name.endswith("/"): zf.extract(name, target_dir_tmp) dist = DistributionHelper.distribution_from_path(target_dir) assert dist is not None, "Failed to cache distribution: {} ".format( source) return dist
def __init__( self, pex, # type: str pex_info=None, # type: Optional[PexInfo] interpreter=None, # type: Optional[PythonInterpreter] ): # type: (...) -> None self._pex = pex self._pex_info = pex_info or PexInfo.from_pex(pex) self._internal_cache = os.path.join(self._pex, self._pex_info.internal_cache) self._activated = False self._working_set = None self._interpreter = interpreter or PythonInterpreter.get() self._inherit_path = self._pex_info.inherit_path self._supported_tags = frozenset(self._interpreter.identity.supported_tags) self._target_interpreter_env = self._interpreter.identity.env_markers # For the bug this works around, see: https://bitbucket.org/pypy/pypy/issues/1686 # NB: This must be installed early before the underlying pex is loaded in any way. if self._interpreter.identity.python_tag.startswith("pp") and zipfile.is_zipfile(self._pex): self._install_pypy_zipimporter_workaround(self._pex) super(PEXEnvironment, self).__init__( search_path=[] if self._pex_info.inherit_path == InheritPath.FALSE else sys.path, platform=self._interpreter.identity.platform_tag, ) TRACER.log( "E: tags for %r x %r -> %s" % (self.platform, self._interpreter, self._supported_tags), V=9, )
def _execute(self): force_interpreter = self._vars.PEX_INTERPRETER # N.B.: This is set in `__main__.py` of the executed PEX by `PEXBuilder` when we've been # executed from within a PEX zip file in `--unzip` mode. We replace `sys.argv[0]` to avoid # confusion and allow the user code we hand off to to provide useful messages and fully valid # re-execs that always re-directed through the PEX file. sys.argv[0] = os.environ.pop('__PEX_EXE__', sys.argv[0]) self._clean_environment(strip_pex_env=self._pex_info.strip_pex_env) if force_interpreter: TRACER.log('PEX_INTERPRETER specified, dropping into interpreter') return self.execute_interpreter() if self._pex_info_overrides.script and self._pex_info_overrides.entry_point: die('Cannot specify both script and entry_point for a PEX!') if self._pex_info.script and self._pex_info.entry_point: die('Cannot specify both script and entry_point for a PEX!') if self._pex_info_overrides.script: return self.execute_script(self._pex_info_overrides.script) elif self._pex_info_overrides.entry_point: return self.execute_entry(self._pex_info_overrides.entry_point) elif self._pex_info.script: return self.execute_script(self._pex_info.script) elif self._pex_info.entry_point: return self.execute_entry(self._pex_info.entry_point) else: TRACER.log('No entry point specified, dropping into interpreter') return self.execute_interpreter()
def _wrap_coverage(self, runner, *args): if not self._vars.PEX_COVERAGE and self._vars.PEX_COVERAGE_FILENAME is None: return runner(*args) try: import coverage except ImportError: die('Could not bootstrap coverage module, aborting.') pex_coverage_filename = self._vars.PEX_COVERAGE_FILENAME if pex_coverage_filename is not None: cov = coverage.coverage(data_file=pex_coverage_filename) else: cov = coverage.coverage(data_suffix=True) TRACER.log('Starting coverage.') cov.start() try: return runner(*args) finally: TRACER.log('Stopping coverage') cov.stop() # TODO(wickman) Post-process coverage to elide $PEX_ROOT and make # the report more useful/less noisy. #89 if pex_coverage_filename: cov.save() else: cov.report(show_missing=False, ignore_errors=True, file=sys.stdout)
def demote_bootstrap(cls): TRACER.log( 'Bootstrap complete, performing final sys.path modifications...') should_log = { level: TRACER.should_log(V=level) for level in range(1, 10) } def log(msg, V=1): if should_log.get(V, False): print('pex: {}'.format(msg), file=sys.stderr) # Remove the third party resources pex uses and demote pex bootstrap code to the end of # sys.path for the duration of the run to allow conflicting versions supplied by user # dependencies to win during the course of the execution of user code. third_party.uninstall() bootstrap = Bootstrap.locate() log('Demoting code from %s' % bootstrap, V=2) for module in bootstrap.demote(): log('un-imported {}'.format(module), V=9) import pex log('Re-imported pex from {}'.format(pex.__path__), V=3) log('PYTHONPATH contains:') for element in sys.path: log(' %c %s' % (' ' if os.path.exists(element) else '*', element)) log(' * - paths that do not exist or will be imported via zipimport')
def __init__(self, pex, pex_info, interpreter=None, **kw): self._internal_cache = os.path.join(pex, pex_info.internal_cache) self._pex = pex self._pex_info = pex_info self._activated = False self._working_set = None self._interpreter = interpreter or PythonInterpreter.get() self._inherit_path = pex_info.inherit_path self._supported_tags = [] # For the bug this works around, see: https://bitbucket.org/pypy/pypy/issues/1686 # NB: This must be installed early before the underlying pex is loaded in any way. if self._interpreter.identity.abbr_impl == 'pp' and zipfile.is_zipfile(self._pex): self._install_pypy_zipimporter_workaround(self._pex) platform = Platform.current() platform_name = platform.platform super(PEXEnvironment, self).__init__( search_path=[] if pex_info.inherit_path == 'false' else sys.path, # NB: Our pkg_resources.Environment base-class wants the platform name string and not the # pex.platform.Platform object. platform=platform_name, **kw ) self._target_interpreter_env = self._interpreter.identity.pkg_resources_env(platform_name) self._supported_tags.extend(platform.supported_tags(self._interpreter)) TRACER.log( 'E: tags for %r x %r -> %s' % (self.platform, self._interpreter, self._supported_tags), V=9 )
def __init__(self, *args, **kw): TRACER.log( 'Warning, using a UrllibContext which is known to be flaky.') TRACER.log( 'Please build pex with the requests module for more reliable downloads.' ) super(UrllibContext, self).__init__(*args, **kw)
def __init__(self, pex, pex_info, interpreter=None, **kw): self._internal_cache = os.path.join(pex, pex_info.internal_cache) self._pex = pex self._pex_info = pex_info self._activated = False self._working_set = None self._interpreter = interpreter or PythonInterpreter.get() self._inherit_path = pex_info.inherit_path self._supported_tags = frozenset( self._interpreter.identity.supported_tags) self._target_interpreter_env = self._interpreter.identity.env_markers # For the bug this works around, see: https://bitbucket.org/pypy/pypy/issues/1686 # NB: This must be installed early before the underlying pex is loaded in any way. if self._interpreter.identity.python_tag.startswith( 'pp') and zipfile.is_zipfile(self._pex): self._install_pypy_zipimporter_workaround(self._pex) super(PEXEnvironment, self).__init__( search_path=[] if pex_info.inherit_path == 'false' else sys.path, platform=self._interpreter.identity.platform_tag, **kw) TRACER.log('E: tags for %r x %r -> %s' % (self.platform, self._interpreter, self._supported_tags), V=9)
def _extras_paths(cls): # type: () -> Iterator[str] standard_lib = sysconfig.get_python_lib(standard_lib=True) try: makefile = sysconfig.parse_makefile( # type: ignore[attr-defined] sysconfig.get_makefile_filename() ) except (AttributeError, IOError): # This is not available by default in PyPy's distutils.sysconfig or it simply is # no longer available on the system (IOError ENOENT) makefile = {} extras_paths = filter( None, makefile.get("EXTRASPATH", "").split(":") ) # type: Iterable[str] for path in extras_paths: yield os.path.join(standard_lib, path) # Handle .pth injected paths as extras. sitedirs = cls._get_site_packages() for pth_path in cls._scan_pth_files(sitedirs): TRACER.log("Found .pth file: %s" % pth_path, V=3) for extras_path in iter_pth_paths(pth_path): yield extras_path
def set_script(self, script): """Set the entry point of this PEX environment based upon a distribution script. :param script: The script name as defined either by a console script or ordinary script within the setup.py of one of the distributions added to the PEX. :raises: :class:`PEXBuilder.InvalidExecutableSpecification` if the script is not found in any distribution added to the PEX. """ # check if 'script' is a console_script dist, entry_point = get_entry_point_from_console_script(script, self._distributions) if entry_point: self.set_entry_point(entry_point) TRACER.log('Set entrypoint to console_script %r in %r' % (entry_point, dist)) return # check if 'script' is an ordinary script dist, _, _ = get_script_from_distributions(script, self._distributions) if dist: if self._pex_info.entry_point: raise self.InvalidExecutableSpecification('Cannot set both entry point and script of PEX!') self._pex_info.script = script TRACER.log('Set entrypoint to script %r in %r' % (script, dist)) return raise self.InvalidExecutableSpecification( 'Could not find script %r in any distribution %s within PEX!' % ( script, ', '.join(str(d) for d in self._distributions)))
def from_id_string(cls, id_string): TRACER.log('creating PythonIdentity from id string: %s' % id_string, V=3) values = str(id_string).split() if len(values) != 6: raise cls.InvalidError("Invalid id string: %s" % id_string) return cls(*values)
def _create_isolated_cmd( cls, binary, # type: str args=None, # type: Optional[Iterable[str]] pythonpath=None, # type: Optional[Iterable[str]] env=None, # type: Optional[Mapping[str, str]] ): # type: (...) -> Tuple[Iterable[str], Mapping[str, str]] cmd = [binary] # Don't add the user site directory to `sys.path`. # # Additionally, it would be nice to pass `-S` to disable adding site-packages but unfortunately # some python distributions include portions of the standard library there. cmd.append("-s") env = cls._sanitized_environment(env=env) pythonpath = list(pythonpath or ()) if pythonpath: env["PYTHONPATH"] = os.pathsep.join(pythonpath) else: # Turn off reading of PYTHON* environment variables. cmd.append("-E") if args: cmd.extend(args) rendered_command = " ".join(cmd) if pythonpath: rendered_command = "PYTHONPATH={} {}".format( env["PYTHONPATH"], rendered_command) TRACER.log("Executing: {}".format(rendered_command), V=3) return cmd, env
def matched_interpreters_iter(interpreters_iter, constraints): """Given some filters, yield any interpreter that matches at least one of them. :param interpreters_iter: A `PythonInterpreter` iterable for filtering. :param constraints: A sequence of strings that constrain the interpreter compatibility for this pex. Each string uses the Requirement-style format, e.g. 'CPython>=3' or '>=2.7,<3' for requirements agnostic to interpreter class. Multiple requirement strings may be combined into a list to OR the constraints, such as ['CPython>=2.7,<3', 'CPython>=3.4']. :return: returns a generator that yields compatible interpreters :raises: :class:`UnsatisfiableInterpreterConstraintsError` if constraints were given and could not be satisfied. The exception will only be raised when the returned generator is fully consumed. """ candidates = [] found = False for interpreter in interpreters_iter: if any(interpreter.identity.matches(filt) for filt in constraints): TRACER.log("Constraints on interpreters: %s, Matching Interpreter: %s" % (constraints, interpreter.binary), V=3) found = True yield interpreter if not found: candidates.append(interpreter) if not found: raise UnsatisfiableInterpreterConstraintsError(constraints, candidates)
def _create_isolated_cmd(cls, binary, args=None, pythonpath=None, env=None): cmd = [binary] # Don't add the user site directory to `sys.path`. # # Additionally, it would be nice to pass `-S` to disable adding site-packages but unfortunately # some python distributions include portions of the standard library there. cmd.append('-s') env = cls._sanitized_environment(env=env) pythonpath = list(pythonpath or ()) if pythonpath: env['PYTHONPATH'] = os.pathsep.join(pythonpath) else: # Turn off reading of PYTHON* environment variables. cmd.append('-E') if args: cmd.extend(args) rendered_command = ' '.join(cmd) if pythonpath: rendered_command = 'PYTHONPATH={} {}'.format( env['PYTHONPATH'], rendered_command) TRACER.log('Executing: {}'.format(rendered_command), V=3) return cmd, env
def run(self, args=(), with_chroot=False, blocking=True, setsid=False, **kwargs): """Run the PythonEnvironment in an interpreter in a subprocess. :keyword args: Additional arguments to be passed to the application being invoked by the environment. :keyword with_chroot: Run with cwd set to the environment's working directory. :keyword blocking: If true, return the return code of the subprocess. If false, return the Popen object of the invoked subprocess. :keyword setsid: If true, run the PEX in a separate operating system session. Remaining keyword arguments are passed directly to subprocess.Popen. """ self.clean_environment() cmdline = self.cmdline(args) TRACER.log('PEX.run invoking %s' % ' '.join(cmdline)) process = Executor.open_process( cmdline, cwd=self._pex if with_chroot else os.getcwd(), preexec_fn=os.setsid if setsid else None, stdin=kwargs.pop('stdin', None), stdout=kwargs.pop('stdout', None), stderr=kwargs.pop('stderr', None), **kwargs) return process.wait() if blocking else process
def iter_pth_paths(filename): """Given a .pth file, extract and yield all inner paths without honoring imports. This shadows Python's site.py behavior, which is invoked at interpreter startup. """ try: f = open(filename, "rU" if PY2 else "r") # noqa except IOError: return dirname = os.path.dirname(filename) known_paths = set() with f: for i, line in enumerate(f, start=1): line = line.rstrip() if not line or line.startswith("#"): continue elif line.startswith(("import ", "import\t")): # One important side effect of executing import lines can be alteration of the # sys.path directly or indirectly as a programmatic way to add sys.path entries # in contrast to the standard .pth mechanism of including fixed paths as # individual lines in the file. Here we capture all such programmatic attempts # to expand the sys.path and report the additions. original_sys_path = sys.path[:] try: # N.B.: Setting sys.path to empty is ok since all the .pth files we find and # execute have already been found and executed by our ambient sys.executable # when it started up before running this PEX file. As such, all symbols imported # by the .pth files then will still be available now as cached in sys.modules. sys.path = [] exec_function(line, globals_map={}) for path in sys.path: yield path except Exception as e: # NB: import lines are routinely abused with extra code appended using `;` so # the class of exceptions that might be raised in broader than ImportError. As # such we catch broadly here. TRACER.log( "Error executing line {linenumber} of {pth_file} with content:\n" "{content}\n" "Error was:\n" "{error}".format(linenumber=i, pth_file=filename, content=line, error=e), V=9, ) # Defer error handling to the higher level site.py logic invoked at startup. return finally: sys.path = original_sys_path else: extras_dir, extras_dir_case_insensitive = makepath( dirname, line) if extras_dir_case_insensitive not in known_paths and os.path.exists( extras_dir): yield extras_dir known_paths.add(extras_dir_case_insensitive)
def __init__(self, pex, pex_info, interpreter=None, **kw): self._internal_cache = os.path.join(pex, pex_info.internal_cache) self._pex = pex self._pex_info = pex_info self._activated = False self._working_set = None self._interpreter = interpreter or PythonInterpreter.get() self._inherit_path = pex_info.inherit_path self._supported_tags = [] # For the bug this works around, see: https://bitbucket.org/pypy/pypy/issues/1686 # NB: This must be installed early before the underlying pex is loaded in any way. if self._interpreter.identity.abbr_impl == 'pp' and zipfile.is_zipfile( self._pex): self._install_pypy_zipimporter_workaround(self._pex) platform = Platform.current() platform_name = platform.platform super(PEXEnvironment, self).__init__( search_path=[] if pex_info.inherit_path == 'false' else sys.path, # NB: Our pkg_resources.Environment base-class wants the platform name string and not the # pex.platform.Platform object. platform=platform_name, **kw) self._target_interpreter_env = self._interpreter.identity.pkg_resources_env( platform_name) self._supported_tags.extend(platform.supported_tags(self._interpreter)) TRACER.log('E: tags for %r x %r -> %s' % (self.platform, self._interpreter, self._supported_tags), V=9)
def crawl_link(cls, context, link): if link.local: return cls.crawl_local(link) elif link.remote: return cls.crawl_remote(context, link) else: TRACER.log('Failed to crawl %s: unknown scheme %s' % (link.url, link.scheme)) return set(), set()
def crawl_local(cls, link): try: dirents = os.listdir(link.local_path) except OSError as e: TRACER.log('Failed to read %s: %s' % (link.local_path, e), V=1) return set(), set() files, dirs = partition([os.path.join(link.local_path, fn) for fn in dirents], os.path.isdir) return set(map(Link.from_filename, files)), set(map(Link.from_filename, dirs))
def _validate(self): if self._hasher: if self._hash_value != self._hasher.hexdigest(): raise Context.Error('%s failed checksum!' % (self._link.url)) else: TRACER.log('Validated %s (%s)' % (self._link.filename, self._link.fragment), V=3)
def get(cls): for context_class in cls._REGISTRY: try: context = context_class() TRACER.log('Constructed %s context %r' % (context.__class__.__name__, context), V=4) return context except cls.Error: continue raise cls.Error('Could not initialize a request context.')
def record_unresolved(dist_not_found): # type: (_DistributionNotFound) -> None TRACER.log("Failed to resolve a requirement: {}".format(dist_not_found.requirement)) requirers = unresolved_reqs.get(dist_not_found.requirement) if requirers is None: requirers = OrderedSet() unresolved_reqs[dist_not_found.requirement] = requirers if dist_not_found.required_by: requirers.add(dist_not_found.required_by)
def _activate(self): # type: () -> WorkingSet pex_file = os.path.realpath(self._pex) self._update_candidate_distributions( self._load_internal_cache(pex_file, self._pex_info)) is_zipped_pex = os.path.isfile(pex_file) if not self._pex_info.zip_safe and is_zipped_pex: explode_dir = self._force_local(pex_file=pex_file, pex_info=self._pex_info) # Force subsequent imports to come from the exploded .pex directory rather than the .pex file. TRACER.log( "Adding exploded non zip-safe pex to the head of sys.path: %s" % explode_dir) sys.path[:] = [ path for path in sys.path if pex_file != os.path.realpath(path) ] sys.path.insert(0, explode_dir) self._update_module_paths(pex_file=pex_file) elif not any(pex_file == os.path.realpath(path) for path in sys.path): TRACER.log("Adding pex %s to the head of sys.path: %s" % ("file" if is_zipped_pex else "dir", pex_file)) sys.path.insert(0, pex_file) all_reqs = [ Requirement.parse(req) for req in self._pex_info.requirements ] working_set = WorkingSet([]) resolved = self._resolve(working_set, all_reqs) for dist in resolved: with TRACER.timed("Activating %s" % dist, V=2): working_set.add(dist) if self._inherit_path == InheritPath.FALLBACK: # Prepend location to sys.path. # # This ensures that bundled versions of libraries will be used before system-installed # versions, in case something is installed in both, helping to favor hermeticity in # the case of non-hermetic PEX files (i.e. those with inherit_path=True). # # If the path is not already in sys.path, site.addsitedir will append (not prepend) # the path to sys.path. But if the path is already in sys.path, site.addsitedir will # leave sys.path unmodified, but will do everything else it would do. This is not part # of its advertised contract (which is very vague), but has been verified to be the # case by inspecting its source for both cpython 2.7 and cpython 3.7. sys.path.insert(0, dist.location) else: sys.path.append(dist.location) with TRACER.timed("Adding sitedir", V=2): site.addsitedir(dist.location) return working_set
def crawl_remote(cls, context, link): try: link = context.resolve(link) content = context.content(link) except context.Error as e: TRACER.log('Failed to read %s: %s' % (link.url, e), V=1) return set(), set() links = set(link.join(href) for href in PageParser.links(content)) rel_links = set(link.join(href) for href in PageParser.rel_links(content)) return links, rel_links
def _force_local(self): if self._pex_info.code_hash is None: # Do not support force_local if code_hash is not set. (It should always be set.) return self._pex explode_dir = os.path.join(self._pex_info.zip_unsafe_cache, self._pex_info.code_hash) TRACER.log("PEX is not zip safe, exploding to %s" % explode_dir) with atomic_directory(explode_dir, exclusive=True) as explode_tmp: if not explode_tmp.is_finalized: self.explode_code(explode_tmp.work_dir) return explode_dir
def execute_script(self, script_name): dists = list(self._activate()) dist, entry_point = get_entry_point_from_console_script(script_name, dists) if entry_point: TRACER.log('Found console_script %r in %r' % (entry_point, dist)) sys.exit(self.execute_entry(entry_point)) dist, script_path, script_content = get_script_from_distributions(script_name, dists) if not dist: raise self.NotFound('Could not find script %r in pex!' % script_name) TRACER.log('Found script %r in %r' % (script_name, dist)) return self.execute_content(script_path, script_content, argv0=script_name)
def from_env(cls, hashbang): """Resolve a PythonInterpreter as /usr/bin/env would. :param hashbang: A string, e.g. "python3.3" representing some binary on the $PATH. """ paths = os.getenv('PATH', '').split(':') for path in paths: for fn in cls.expand_path(path): basefile = os.path.basename(fn) if hashbang == basefile: try: return cls.from_binary(fn) except Exception as e: TRACER.log('Could not identify %s: %s' % (fn, e))
def __init__(self, allow_prereleases=None, interpreter=None, platform=None, use_manylinux=None): self._interpreter = interpreter or PythonInterpreter.get() self._platform = self._maybe_expand_platform(self._interpreter, platform) self._allow_prereleases = allow_prereleases platform_name = self._platform.platform self._target_interpreter_env = self._interpreter.identity.pkg_resources_env(platform_name) self._supported_tags = self._platform.supported_tags( self._interpreter, use_manylinux ) TRACER.log( 'R: tags for %r x %r -> %s' % (self._platform, self._interpreter, self._supported_tags), V=9 )
def vendor_runtime(chroot, dest_basedir, label, root_module_names): """Includes portions of vendored distributions in a chroot. The portion to include is selected by root module name. If the module is a file, just it is included. If the module represents a package, the package and all its sub-packages are added recursively. :param chroot: The chroot to add vendored code to. :type chroot: :class:`pex.common.Chroot` :param str dest_basedir: The prefix to store the vendored code under in the ``chroot``. :param str label: The chroot label for the vendored code fileset. :param root_module_names: The names of the root vendored modules to include in the chroot. :type root_module_names: :class:`collections.Iterable` of str :raise: :class:`ValueError` if any of the given ``root_module_names`` could not be found amongst the vendored code and added to the chroot. """ vendor_module_names = {root_module_name: False for root_module_name in root_module_names} for spec in iter_vendor_specs(): for root, dirs, files in os.walk(spec.target_dir): if root == spec.target_dir: dirs[:] = [pkg_name for pkg_name in dirs if pkg_name in vendor_module_names] files[:] = [mod_name for mod_name in files if mod_name[:-3] in vendor_module_names] vendored_names = dirs + files if vendored_names: pkg_path = '' for pkg in spec.relpath.split(os.sep): pkg_path = os.path.join(pkg_path, pkg) pkg_file = os.path.join(pkg_path, '__init__.py') src = os.path.join(VendorSpec.ROOT, pkg_file) dest = os.path.join(dest_basedir, pkg_file) if os.path.exists(src): chroot.copy(src, dest, label) else: # We delete `pex/vendor/_vendored/<dist>/__init__.py` when isolating third_party. chroot.touch(dest, label) for name in vendored_names: vendor_module_names[name] = True TRACER.log('Vendoring {} from {} @ {}'.format(name, spec, spec.target_dir), V=3) for filename in files: if not filename.endswith('.pyc'): # Sources and data only. src = os.path.join(root, filename) dest = os.path.join(dest_basedir, spec.relpath, os.path.relpath(src, spec.target_dir)) chroot.copy(src, dest, label) if not all(vendor_module_names.values()): raise ValueError('Failed to extract {module_names} from:\n\t{specs}'.format( module_names=', '.join(module for module, written in vendor_module_names.items() if not written), specs='\n\t'.join('{} @ {}'.format(spec, spec.target_dir) for spec in iter_vendor_specs())))
def matched_interpreters(interpreters, constraints): """Given some filters, yield any interpreter that matches at least one of them. :param interpreters: a list of PythonInterpreter objects for filtering :param constraints: A sequence of strings that constrain the interpreter compatibility for this pex. Each string uses the Requirement-style format, e.g. 'CPython>=3' or '>=2.7,<3' for requirements agnostic to interpreter class. Multiple requirement strings may be combined into a list to OR the constraints, such as ['CPython>=2.7,<3', 'CPython>=3.4']. :return interpreter: returns a generator that yields compatible interpreters """ for interpreter in interpreters: if any(interpreter.identity.matches(filt) for filt in constraints): TRACER.log("Constraints on interpreters: %s, Matching Interpreter: %s" % (constraints, interpreter.binary), V=3) yield interpreter
def maybe_reexec_pex(compatibility_constraints): """ Handle environment overrides for the Python interpreter to use when executing this pex. This function supports interpreter filtering based on interpreter constraints stored in PEX-INFO metadata. If PEX_PYTHON is set in a pexrc, it attempts to obtain the binary location of the interpreter specified by PEX_PYTHON. If PEX_PYTHON_PATH is set, it attempts to search the path for a matching interpreter in accordance with the interpreter constraints. If both variables are present in a pexrc, this function gives precedence to PEX_PYTHON_PATH and errors out if no compatible interpreters can be found on said path. If neither variable is set, we fall back to plain PEX execution using PATH searching or the currently executing interpreter. If compatibility constraints are used, we match those constraints against these interpreters. :param compatibility_constraints: list of requirements-style strings that constrain the Python interpreter to re-exec this pex with. """ if os.environ.pop('SHOULD_EXIT_BOOTSTRAP_REEXEC', None): # We've already been here and selected an interpreter. Continue to execution. return target = None with TRACER.timed('Selecting runtime interpreter based on pexrc', V=3): if ENV.PEX_PYTHON and not ENV.PEX_PYTHON_PATH: # preserve PEX_PYTHON re-exec for backwards compatibility # TODO: Kill this off completely in favor of PEX_PYTHON_PATH # https://github.com/pantsbuild/pex/issues/431 target = _select_pex_python_interpreter(ENV.PEX_PYTHON, compatibility_constraints=compatibility_constraints) elif ENV.PEX_PYTHON_PATH: target = _select_interpreter(pex_python_path=ENV.PEX_PYTHON_PATH, compatibility_constraints=compatibility_constraints) elif compatibility_constraints: # Apply constraints to target using regular PATH target = _select_interpreter(compatibility_constraints=compatibility_constraints) if target and os.path.realpath(target) != os.path.realpath(sys.executable): cmdline = [target] + sys.argv TRACER.log('Re-executing: cmdline="%s", sys.executable="%s", PEX_PYTHON="%s", ' 'PEX_PYTHON_PATH="%s", COMPATIBILITY_CONSTRAINTS="%s"' % (cmdline, sys.executable, ENV.PEX_PYTHON, ENV.PEX_PYTHON_PATH, compatibility_constraints)) ENV.delete('PEX_PYTHON') ENV.delete('PEX_PYTHON_PATH') os.environ['SHOULD_EXIT_BOOTSTRAP_REEXEC'] = '1' os.execve(target, cmdline, ENV.copy())
def find(cls, paths): """ Given a list of files or directories, try to detect python interpreters amongst them. Returns a list of PythonInterpreter objects. """ pythons = [] for path in paths: for fn in cls.expand_path(path): basefile = os.path.basename(fn) if cls._matches_binary_name(basefile): try: pythons.append(cls.from_binary(fn)) except Exception as e: TRACER.log('Could not identify %s: %s' % (fn, e)) continue return pythons
def minimum_sys(cls, inherit_path): """Return the minimum sys necessary to run this interpreter, a la python -S. :returns: (sys.path, sys.path_importer_cache, sys.modules) tuple of a bare python installation. """ site_libs = set(cls.site_libs()) for site_lib in site_libs: TRACER.log('Found site-library: %s' % site_lib) for extras_path in cls._extras_paths(): TRACER.log('Found site extra: %s' % extras_path) site_libs.add(extras_path) site_libs = set(os.path.normpath(path) for path in site_libs) sys_path, sys_path_importer_cache = cls.minimum_sys_path(site_libs, inherit_path) sys_modules = cls.minimum_sys_modules(site_libs) return sys_path, sys_path_importer_cache, sys_modules
def _crawl(self, link_or_links, follow_links): links, seen = set(), set() queue = Queue() converged = threading.Event() def execute(): while not converged.is_set(): try: link = queue.get(timeout=0.01) except Empty: continue if link not in seen: seen.add(link) try: roots, rels = self.crawl_link(self.context, link) except Exception as e: TRACER.log('Unknown exception encountered: %s' % e) for line in traceback.format_exc().splitlines(): TRACER.log(line) queue.task_done() continue links.update(roots) if follow_links: for rel in rels: if rel not in seen: queue.put(rel) queue.task_done() for i, link in enumerate(link_or_links): TRACER.log('crawling link i=%s link=%s follow_links=%s' % (i, link, follow_links), V=3) queue.put(link) workers = [] for _ in range(self._threads): worker = threading.Thread(target=execute) workers.append(worker) worker.daemon = True worker.start() queue.join() converged.set() # We deliberately do not join the worker threads, since they are no longer of any use to us. return links
def _extras_paths(cls): standard_lib = sysconfig.get_python_lib(standard_lib=True) try: makefile = sysconfig.parse_makefile(sysconfig.get_makefile_filename()) except (AttributeError, IOError): # This is not available by default in PyPy's distutils.sysconfig or it simply is # no longer available on the system (IOError ENOENT) makefile = {} extras_paths = filter(None, makefile.get('EXTRASPATH', '').split(':')) for path in extras_paths: yield os.path.join(standard_lib, path) # Handle .pth injected paths as extras. sitedirs = cls._get_site_packages() for pth_path in cls._scan_pth_files(sitedirs): TRACER.log('Found .pth file: %s' % pth_path, V=3) for extras_path in iter_pth_paths(pth_path): yield extras_path
def expand_platform(): expanded_platform = Platform(platform=cur_plat.platform, impl=interpreter.identity.abbr_impl, version=interpreter.identity.impl_ver, abi=interpreter.identity.abi_tag) TRACER.log(""" Modifying given platform of {given_platform!r}: Using the current platform of {current_platform!r} Under current interpreter {current_interpreter!r} To match given interpreter {given_interpreter!r}. Calculated platform: {calculated_platform!r}""".format( given_platform=platform, current_platform=cur_plat, current_interpreter=PythonInterpreter.get(), given_interpreter=interpreter, calculated_platform=expanded_platform), V=9 ) return expanded_platform
def execute(): while not converged.is_set(): try: link = queue.get(timeout=0.01) except Empty: continue if link not in seen: seen.add(link) try: roots, rels = self.crawl_link(self.context, link) except Exception as e: TRACER.log('Unknown exception encountered: %s' % e) for line in traceback.format_exc().splitlines(): TRACER.log(line) queue.task_done() continue links.update(roots) if follow_links: for rel in rels: if rel not in seen: queue.put(rel) queue.task_done()
def force_local(cls, pex_file, pex_info): if pex_info.code_hash is None: # Do not support force_local if code_hash is not set. (It should always be set.) return pex_file explode_dir = os.path.join(pex_info.zip_unsafe_cache, pex_info.code_hash) TRACER.log('PEX is not zip safe, exploding to %s' % explode_dir) if not os.path.exists(explode_dir): explode_tmp = explode_dir + '.' + uuid.uuid4().hex with TRACER.timed('Unzipping %s' % pex_file): try: safe_mkdir(explode_tmp) with open_zip(pex_file) as pex_zip: pex_files = (x for x in pex_zip.namelist() if not x.startswith(pex_builder.BOOTSTRAP_DIR) and not x.startswith(PexInfo.INTERNAL_CACHE)) pex_zip.extractall(explode_tmp, pex_files) except: # noqa: T803 safe_rmtree(explode_tmp) raise TRACER.log('Renaming %s to %s' % (explode_tmp, explode_dir)) rename_if_empty(explode_tmp, explode_dir) return explode_dir
def minimum_sys_modules(cls, site_libs, modules=None): """Given a set of site-packages paths, return a "clean" sys.modules. When importing site, modules within sys.modules have their __path__'s populated with additional paths as defined by *-nspkg.pth in site-packages, or alternately by distribution metadata such as *.dist-info/namespace_packages.txt. This can possibly cause namespace packages to leak into imports despite being scrubbed from sys.path. NOTE: This method mutates modules' __path__ attributes in sys.modules, so this is currently an irreversible operation. """ modules = modules or sys.modules new_modules = {} for module_name, module in modules.items(): # builtins can stay if not hasattr(module, '__path__'): new_modules[module_name] = module continue # Unexpected objects, e.g. PEP 420 namespace packages, should just be dropped. if not isinstance(module.__path__, list): TRACER.log('Dropping %s' % (module_name,), V=3) continue # Pop off site-impacting __path__ elements in-place. for k in reversed(range(len(module.__path__))): if cls._tainted_path(module.__path__[k], site_libs): TRACER.log('Scrubbing %s.__path__: %s' % (module_name, module.__path__[k]), V=3) module.__path__.pop(k) # It still contains path elements not in site packages, so it can stay in sys.modules if module.__path__: new_modules[module_name] = module return new_modules
def minimum_sys_path(cls, site_libs, inherit_path): scrub_paths = OrderedSet() site_distributions = OrderedSet() user_site_distributions = OrderedSet() def all_distribution_paths(path): locations = set(dist.location for dist in find_distributions(path)) return set([path]) | locations | set(os.path.realpath(path) for path in locations) for path_element in sys.path: if cls._tainted_path(path_element, site_libs): TRACER.log('Tainted path element: %s' % path_element) site_distributions.update(all_distribution_paths(path_element)) else: TRACER.log('Not a tainted path element: %s' % path_element, V=2) user_site_distributions.update(all_distribution_paths(USER_SITE)) if inherit_path == 'false': scrub_paths = site_distributions | user_site_distributions for path in user_site_distributions: TRACER.log('Scrubbing from user site: %s' % path) for path in site_distributions: TRACER.log('Scrubbing from site-packages: %s' % path) scrubbed_sys_path = list(OrderedSet(sys.path) - scrub_paths) scrub_from_importer_cache = filter( lambda key: any(key.startswith(path) for path in scrub_paths), sys.path_importer_cache.keys()) scrubbed_importer_cache = dict((key, value) for (key, value) in sys.path_importer_cache.items() if key not in scrub_from_importer_cache) for importer_cache_entry in scrub_from_importer_cache: TRACER.log('Scrubbing from path_importer_cache: %s' % importer_cache_entry, V=2) return scrubbed_sys_path, scrubbed_importer_cache
def run(self, args=(), with_chroot=False, blocking=True, setsid=False, **kwargs): """Run the PythonEnvironment in an interpreter in a subprocess. :keyword args: Additional arguments to be passed to the application being invoked by the environment. :keyword with_chroot: Run with cwd set to the environment's working directory. :keyword blocking: If true, return the return code of the subprocess. If false, return the Popen object of the invoked subprocess. :keyword setsid: If true, run the PEX in a separate operating system session. Remaining keyword arguments are passed directly to subprocess.Popen. """ self.clean_environment() cmdline = self.cmdline(args) TRACER.log('PEX.run invoking %s' % ' '.join(cmdline)) process = Executor.open_process(cmdline, cwd=self._pex if with_chroot else os.getcwd(), preexec_fn=os.setsid if setsid else None, stdin=kwargs.pop('stdin', None), stdout=kwargs.pop('stdout', None), stderr=kwargs.pop('stderr', None), **kwargs) return process.wait() if blocking else process
def open(self, link): # requests does not support file:// -- so we must short-circuit manually if link.local: return open(link.local_path, 'rb') # noqa: T802 for attempt in range(self._max_retries + 1): try: return StreamFilelike(self._session.get( link.url, verify=self._verify, stream=True, headers={'User-Agent': self.USER_AGENT}, timeout=self._timeout), link) except requests.exceptions.ReadTimeout: # Connect timeouts are handled by the HTTPAdapter, unfortunately read timeouts are not # so we'll retry them ourselves. TRACER.log('Read timeout trying to fetch %s, retrying. %d retries remain.' % ( link.url, self._max_retries - attempt)) except requests.exceptions.RequestException as e: raise self.Error(e) raise self.Error( requests.packages.urllib3.exceptions.MaxRetryError( None, link, 'Exceeded max retries of %d' % self._max_retries))
def update_module_paths(cls, pex_file, explode_dir): bootstrap = Bootstrap.locate() pex_path = os.path.realpath(pex_file) # Un-import any modules already loaded from within the .pex file. to_reimport = [] for name, module in reversed(sorted(sys.modules.items())): if bootstrap.imported_from_bootstrap(module): TRACER.log('Not re-importing module %s from bootstrap.' % module, V=3) continue pkg_path = getattr(module, '__path__', None) if pkg_path and any(os.path.realpath(path_item).startswith(pex_path) for path_item in pkg_path): sys.modules.pop(name) to_reimport.append((name, pkg_path, True)) elif name != '__main__': # The __main__ module is special in python and is not re-importable. mod_file = getattr(module, '__file__', None) if mod_file and os.path.realpath(mod_file).startswith(pex_path): sys.modules.pop(name) to_reimport.append((name, mod_file, False)) # Force subsequent imports to come from the exploded .pex directory rather than the .pex file. TRACER.log('Adding to the head of sys.path: %s' % explode_dir) sys.path.insert(0, explode_dir) # And re-import them from the exploded pex. for name, existing_path, is_pkg in to_reimport: TRACER.log('Re-importing %s %s loaded via %r from exploded pex.' % ('package' if is_pkg else 'module', name, existing_path)) reimported_module = importlib.import_module(name) if is_pkg: for path_item in existing_path: # NB: It is not guaranteed that __path__ is a list, it may be a PEP-420 namespace package # object which supports a limited mutation API; so we append each item individually. reimported_module.__path__.append(path_item)
def __init__(self, *args, **kw): TRACER.log('Warning, using a UrllibContext which is known to be flaky.') TRACER.log('Please build pex with the requests module for more reliable downloads.') super(UrllibContext, self).__init__(*args, **kw)