async def strip_source_roots_from_snapshot( request: StripSnapshotRequest, ) -> SourceRootStrippedSources: """Removes source roots from a snapshot, e.g. `src/python/pants/util/strutil.py` -> `pants/util/strutil.py`.""" if not request.snapshot.files: return SourceRootStrippedSources(request.snapshot, FrozenDict()) if request.representative_path is not None: source_root_obj = await Get[SourceRoot]( SourceRootRequest, SourceRootRequest.for_file(request.representative_path)) source_root = source_root_obj.path if source_root == ".": return SourceRootStrippedSources.for_single_source_root( request.snapshot, source_root) resulting_snapshot = await Get[Snapshot](RemovePrefix( request.snapshot.digest, source_root)) return SourceRootStrippedSources.for_single_source_root( resulting_snapshot, source_root) source_roots = await MultiGet( Get[SourceRoot](SourceRootRequest, SourceRootRequest.for_file(file)) for file in request.snapshot.files) file_to_source_root = dict(zip(request.snapshot.files, source_roots)) files_grouped_by_source_root = { source_root.path: tuple(files) for source_root, files in itertools.groupby( request.snapshot.files, key=file_to_source_root.__getitem__) } if len(files_grouped_by_source_root) == 1: source_root = next(iter(files_grouped_by_source_root.keys())) if source_root == ".": return SourceRootStrippedSources.for_single_source_root( request.snapshot, source_root) resulting_snapshot = await Get[Snapshot](RemovePrefix( request.snapshot.digest, source_root)) return SourceRootStrippedSources.for_single_source_root( resulting_snapshot, source_root) snapshot_subsets = await MultiGet( Get[Snapshot](SnapshotSubset(request.snapshot.digest, PathGlobs( files))) for files in files_grouped_by_source_root.values()) resulting_digests = await MultiGet( Get[Digest](RemovePrefix(snapshot.digest, source_root)) for snapshot, source_root in zip(snapshot_subsets, files_grouped_by_source_root.keys())) resulting_snapshot = await Get[Snapshot](MergeDigests(resulting_digests)) return SourceRootStrippedSources( resulting_snapshot, FrozenDict({ source_root: tuple(file[len(source_root) + 1:] for file in files) for source_root, files in files_grouped_by_source_root.items() }), )
async def strip_sources_paths(sources_paths: SourcesPaths) -> StrippedSourceFileNames: if not sources_paths.files: return StrippedSourceFileNames() source_root = await Get( SourceRoot, SourceRootRequest, SourceRootRequest.for_file(sources_paths.files[0]) ) if source_root.path == ".": return StrippedSourceFileNames(sources_paths.files) return StrippedSourceFileNames(fast_relpath(f, source_root.path) for f in sources_paths.files)
async def strip_file_name(request: StrippedFileNameRequest) -> StrippedFileName: source_root = await Get( SourceRoot, SourceRootRequest, SourceRootRequest.for_file(request.file_path) ) return StrippedFileName( request.file_path if source_root.path == "." else fast_relpath(request.file_path, source_root.path) )
async def resolve_pex_entry_point(request: ResolvePexEntryPointRequest) -> ResolvedPexEntryPoint: ep_val = request.entry_point_field.value if ep_val is None: return ResolvedPexEntryPoint(None, file_name_used=False) address = request.entry_point_field.address # We support several different schemes: # 1) `path.to.module` => preserve exactly. # 2) `path.to.module:func` => preserve exactly. # 3) `app.py` => convert into `path.to.app`. # 4) `app.py:func` => convert into `path.to.app:func`. # If it's already a module (cases #1 and #2), simply use that. Otherwise, convert the file name # into a module path (cases #3 and #4). if not ep_val.module.endswith(".py"): return ResolvedPexEntryPoint(ep_val, file_name_used=False) # Use the engine to validate that the file exists and that it resolves to only one file. full_glob = os.path.join(address.spec_path, ep_val.module) entry_point_paths = await Get( Paths, PathGlobs( [full_glob], glob_match_error_behavior=GlobMatchErrorBehavior.error, description_of_origin=f"{address}'s `{request.entry_point_field.alias}` field", ), ) # We will have already raised if the glob did not match, i.e. if there were no files. But # we need to check if they used a file glob (`*` or `**`) that resolved to >1 file. if len(entry_point_paths.files) != 1: raise InvalidFieldException( softwrap( f""" Multiple files matched for the `{request.entry_point_field.alias}` {ep_val.spec!r} for the target {address}, but only one file expected. Are you using a glob, rather than a file name? All matching files: {list(entry_point_paths.files)}. """ ) ) entry_point_path = entry_point_paths.files[0] source_root = await Get( SourceRoot, SourceRootRequest, SourceRootRequest.for_file(entry_point_path), ) stripped_source_path = os.path.relpath(entry_point_path, source_root.path) module_base, _ = os.path.splitext(stripped_source_path) normalized_path = module_base.replace(os.path.sep, ".") return ResolvedPexEntryPoint( dataclasses.replace(ep_val, module=normalized_path), file_name_used=True )
async def resolve_python_aws_handler( request: ResolvePythonAwsHandlerRequest, ) -> ResolvedPythonAwsHandler: handler_val = request.field.value field_alias = request.field.alias address = request.field.address path, _, func = handler_val.partition(":") # If it's already a module, simply use that. Otherwise, convert the file name into a module # path. if not path.endswith(".py"): return ResolvedPythonAwsHandler(handler_val, file_name_used=False) # Use the engine to validate that the file exists and that it resolves to only one file. full_glob = os.path.join(address.spec_path, path) handler_paths = await Get( Paths, PathGlobs( [full_glob], glob_match_error_behavior=GlobMatchErrorBehavior.error, description_of_origin=f"{address}'s `{field_alias}` field", ), ) # We will have already raised if the glob did not match, i.e. if there were no files. But # we need to check if they used a file glob (`*` or `**`) that resolved to >1 file. if len(handler_paths.files) != 1: raise InvalidFieldException( softwrap( f""" Multiple files matched for the `{field_alias}` {repr(handler_val)} for the target {address}, but only one file expected. Are you using a glob, rather than a file name? All matching files: {list(handler_paths.files)}. """ ) ) handler_path = handler_paths.files[0] source_root = await Get( SourceRoot, SourceRootRequest, SourceRootRequest.for_file(handler_path), ) stripped_source_path = os.path.relpath(handler_path, source_root.path) module_base, _ = os.path.splitext(stripped_source_path) normalized_path = module_base.replace(os.path.sep, ".") return ResolvedPythonAwsHandler(f"{normalized_path}:{func}", file_name_used=True)
async def package_pex_binary( field_set: PexBinaryFieldSet, pex_binary_defaults: PexBinaryDefaults, global_options: GlobalOptions, ) -> BuiltPackage: entry_point = field_set.entry_point.value if entry_point is None: binary_source_paths = await Get( Paths, PathGlobs, field_set.sources.path_globs(FilesNotFoundBehavior.error)) if len(binary_source_paths.files) != 1: raise InvalidFieldException( "No `entry_point` was set for the target " f"{repr(field_set.address)}, so it must have exactly one source, but it has " f"{len(binary_source_paths.files)}") entry_point_path = binary_source_paths.files[0] source_root = await Get( SourceRoot, SourceRootRequest, SourceRootRequest.for_file(entry_point_path), ) entry_point = PexBinarySources.translate_source_file_to_entry_point( os.path.relpath(entry_point_path, source_root.path)) output_filename = field_set.output_path.value_or_default( field_set.address, file_ending="pex", use_legacy_format=global_options.options.pants_distdir_legacy_paths, ) two_step_pex = await Get( TwoStepPex, TwoStepPexFromTargetsRequest( PexFromTargetsRequest( addresses=[field_set.address], internal_only=False, entry_point=entry_point, platforms=PexPlatforms.create_from_platforms_field( field_set.platforms), output_filename=output_filename, additional_args=field_set.generate_additional_args( pex_binary_defaults), )), ) return BuiltPackage(two_step_pex.pex.digest, (BuiltPackageArtifact(output_filename), ))
async def resolve_pex_entry_point( request: ResolvePexEntryPointRequest) -> ResolvedPexEntryPoint: if request.entry_point_field.value: return ResolvedPexEntryPoint(request.entry_point_field.value) binary_source_paths = await Get( Paths, PathGlobs, request.sources.path_globs(FilesNotFoundBehavior.error)) if len(binary_source_paths.files) != 1: raise InvalidFieldException( "No `entry_point` was set for the target " f"{repr(request.sources.address)}, so it must have exactly one source, but it has " f"{len(binary_source_paths.files)}.") entry_point_path = binary_source_paths.files[0] source_root = await Get( SourceRoot, SourceRootRequest, SourceRootRequest.for_file(entry_point_path), ) stripped_source_path = os.path.relpath(entry_point_path, source_root.path) module_base, _ = os.path.splitext(stripped_source_path) return ResolvedPexEntryPoint(module_base.replace(os.path.sep, "."))
async def get_ancestor_init_py(targets: Targets) -> AncestorInitPyFiles: """Find any ancestor __init__.py files for the given targets. Includes sibling __init__.py files. Returns the files stripped of their source roots. """ sources = await Get[SourceFiles](AllSourceFilesRequest( (tgt.get(Sources) for tgt in targets), for_sources_types=(PythonSources, ), enable_codegen=True, )) # Find the ancestors of all dirs containing .py files, including those dirs themselves. source_dir_ancestors: Set[Tuple[ str, str]] = set() # Items are (src_root, path incl. src_root). source_roots = await MultiGet( Get[SourceRoot](SourceRootRequest, SourceRootRequest.for_file(path)) for path in sources.files) for path, source_root in zip(sources.files, source_roots): source_dir_ancestor = os.path.dirname(path) # Do not allow the repository root to leak (i.e., '.' should not be a package in setup.py). while source_dir_ancestor != source_root.path: source_dir_ancestors.add((source_root.path, source_dir_ancestor)) source_dir_ancestor = os.path.dirname(source_dir_ancestor) source_dir_ancestors_list = list( source_dir_ancestors) # To force a consistent order. # Note that we must MultiGet single globs instead of a a single Get for all the globs, because # we match each result to its originating glob (see use of zip below). ancestor_init_py_snapshots = await MultiGet( Get[Snapshot]( PathGlobs, PathGlobs([os.path.join(source_dir_ancestor[1], "__init__.py")])) for source_dir_ancestor in source_dir_ancestors_list) source_root_stripped_ancestor_init_pys = await MultiGet( Get[Digest](RemovePrefix(snapshot.digest, source_dir_ancestor[0])) for snapshot, source_dir_ancestor in zip(ancestor_init_py_snapshots, source_dir_ancestors_list)) return AncestorInitPyFiles(source_root_stripped_ancestor_init_pys)
async def prepare_python_sources( request: PythonSourceFilesRequest, union_membership: UnionMembership) -> PythonSourceFiles: sources = await Get( SourceFiles, SourceFilesRequest( (tgt.get(SourcesField) for tgt in request.targets), for_sources_types=request.valid_sources_types, enable_codegen=True, ), ) missing_init_files = await Get( AncestorFiles, AncestorFilesRequest(input_files=sources.snapshot.files, requested=("__init__.py", "__init__.pyi")), ) init_injected = await Get( Snapshot, MergeDigests( (sources.snapshot.digest, missing_init_files.snapshot.digest))) # Codegen is able to generate code in any arbitrary location, unlike sources normally being # rooted under the target definition. To determine source roots for these generated files, we # cannot use the normal `SourceRootRequest.for_target()` and we instead must determine # a source root for every individual generated file. So, we re-resolve the codegen sources here. python_and_resources_targets = [] codegen_targets = [] for tgt in request.targets: if tgt.has_field(PythonSourceField) or tgt.has_field( ResourceSourceField): python_and_resources_targets.append(tgt) elif tgt.get(SourcesField).can_generate( PythonSourceField, union_membership) or tgt.get(SourcesField).can_generate( ResourceSourceField, union_membership): codegen_targets.append(tgt) codegen_sources = await MultiGet( Get( HydratedSources, HydrateSourcesRequest( tgt.get(SourcesField), for_sources_types=request.valid_sources_types, enable_codegen=True, ), ) for tgt in codegen_targets) source_root_requests = [ *(SourceRootRequest.for_target(tgt) for tgt in python_and_resources_targets), *(SourceRootRequest.for_file(f) for sources in codegen_sources for f in sources.snapshot.files), ] source_root_objs = await MultiGet( Get(SourceRoot, SourceRootRequest, req) for req in source_root_requests) source_root_paths = { source_root_obj.path for source_root_obj in source_root_objs } return PythonSourceFiles( SourceFiles(init_injected, sources.unrooted_files), tuple(sorted(source_root_paths)))
async def resolve_pex_entry_point( request: ResolvePexEntryPointRequest) -> ResolvedPexEntryPoint: ep_val = request.entry_point_field.value ep_alias = request.entry_point_field.alias address = request.entry_point_field.address # TODO: factor up some of this code between python_awslambda and pex_binary once `sources` is # removed. # This code is tricky, as we support several different schemes: # 1) `<none>` or `<None>` => set to `None`. # 2) `path.to.module` => preserve exactly. # 3) `path.to.module:func` => preserve exactly. # 4) `app.py` => convert into `path.to.app`. # 5) `app.py:func` => convert into `path.to.app:func`. if ep_val is None: instructions_url = docs_url( "python-package-goal#creating-a-pex-file-from-a-pex_binary-target") raise InvalidFieldException( f"The `{ep_alias}` field is not set for the target {address}. Run " f"`./pants help pex_binary` for more information on how to set the field or " f"see {instructions_url}.") # Case #1. if ep_val.module in ("<none>", "<None>"): return ResolvedPexEntryPoint(None) # If it's already a module (cases #2 and #3), simply use that. Otherwise, convert the file name # into a module path (cases #4 and #5). if not ep_val.module.endswith(".py"): return ResolvedPexEntryPoint(ep_val) # Use the engine to validate that the file exists and that it resolves to only one file. full_glob = os.path.join(address.spec_path, ep_val.module) entry_point_paths = await Get( Paths, PathGlobs( [full_glob], glob_match_error_behavior=GlobMatchErrorBehavior.error, description_of_origin= f"{address}'s `{request.entry_point_field.alias}` field", ), ) # We will have already raised if the glob did not match, i.e. if there were no files. But # we need to check if they used a file glob (`*` or `**`) that resolved to >1 file. if len(entry_point_paths.files) != 1: raise InvalidFieldException( f"Multiple files matched for the `{ep_alias}` {ep_val.spec!r} for the target " f"{address}, but only one file expected. Are you using a glob, rather than a file " f"name?\n\nAll matching files: {list(entry_point_paths.files)}.") entry_point_path = entry_point_paths.files[0] source_root = await Get( SourceRoot, SourceRootRequest, SourceRootRequest.for_file(entry_point_path), ) stripped_source_path = os.path.relpath(entry_point_path, source_root.path) module_base, _ = os.path.splitext(stripped_source_path) normalized_path = module_base.replace(os.path.sep, ".") return ResolvedPexEntryPoint( dataclasses.replace(ep_val, module=normalized_path))
async def resolve_pex_entry_point( request: ResolvePexEntryPointRequest) -> ResolvedPexEntryPoint: ep_val = request.entry_point_field.value ep_alias = request.entry_point_field.alias address = request.entry_point_field.address # TODO: factor up some of this code between python_awslambda and pex_binary once `sources` is # removed. # This code is tricky, as we support several different schemes: # 1) `<none>` or `<None>` => set to `None`. # 2) `path.to.module` => preserve exactly. # 3) `path.to.module:func` => preserve exactly. # 4) `app.py` => convert into `path.to.app`. # 5) `app.py:func` => convert into `path.to.app:func`. # 6) `:func` => if there's a sources field, convert to `path.to.sources:func` (deprecated). # 7) no entry point field, but `sources` field => convert to `path.to.sources` (deprecated). # Handle deprecated cases #6 and #7, which are the only cases where the `sources` field matters # for calculating the entry point. if not ep_val or ep_val.startswith(":"): binary_source_paths = await Get( Paths, PathGlobs, request.sources.path_globs(FilesNotFoundBehavior.error)) if len(binary_source_paths.files) != 1: instructions_url = docs_url( "python-package-goal#creating-a-pex-file-from-a-pex_binary-target" ) if not ep_val: raise InvalidFieldException( f"The `{ep_alias}` field is not set for the target {address}. Run " f"`./pants help pex_binary` for more information on how to set the field or " f"see {instructions_url}.") raise InvalidFieldException( f"The `{ep_alias}` field for the target {address} is set to the short-hand value " f"{repr(ep_val)}, but the `sources` field is not set. Pants requires the " "`sources` field to expand the entry point to the normalized form " f"`path.to.module:{ep_val}`. Please either set the `sources` field to exactly one " f"file (deprecated) or set `{ep_alias}='my_file.py:{ep_val}'`. See " f"{instructions_url}.") entry_point_path = binary_source_paths.files[0] source_root = await Get( SourceRoot, SourceRootRequest, SourceRootRequest.for_file(entry_point_path), ) stripped_source_path = os.path.relpath(entry_point_path, source_root.path) module_base, _ = os.path.splitext(stripped_source_path) normalized_path = module_base.replace(os.path.sep, ".") return ResolvedPexEntryPoint( f"{normalized_path}{ep_val}" if ep_val else normalized_path) # Case #1. if ep_val in ("<none>", "<None>"): return ResolvedPexEntryPoint(None) path, _, func = ep_val.partition(":") # If it's already a module (cases #2 and #3), simply use that. Otherwise, convert the file name # into a module path (cases #4 and #5). if not path.endswith(".py"): return ResolvedPexEntryPoint(ep_val) # Use the engine to validate that the file exists and that it resolves to only one file. full_glob = os.path.join(address.spec_path, path) entry_point_paths = await Get( Paths, PathGlobs( [full_glob], glob_match_error_behavior=GlobMatchErrorBehavior.error, description_of_origin= f"{address}'s `{request.entry_point_field.alias}` field", ), ) # We will have already raised if the glob did not match, i.e. if there were no files. But # we need to check if they used a file glob (`*` or `**`) that resolved to >1 file. if len(entry_point_paths.files) != 1: raise InvalidFieldException( f"Multiple files matched for the `{ep_alias}` {repr(ep_val)} for the target " f"{address}, but only one file expected. Are you using a glob, rather than a file " f"name?\n\nAll matching files: {list(entry_point_paths.files)}.") entry_point_path = entry_point_paths.files[0] source_root = await Get( SourceRoot, SourceRootRequest, SourceRootRequest.for_file(entry_point_path), ) stripped_source_path = os.path.relpath(entry_point_path, source_root.path) module_base, _ = os.path.splitext(stripped_source_path) normalized_path = module_base.replace(os.path.sep, ".") return ResolvedPexEntryPoint( f"{normalized_path}:{func}" if func else normalized_path)
async def create_python_binary_run_request( field_set: PythonBinaryFieldSet, python_binary_defaults: PythonBinaryDefaults, pex_env: PexEnvironment, ) -> RunRequest: entry_point = field_set.entry_point.value if entry_point is None: binary_source_paths = await Get( Paths, PathGlobs, field_set.sources.path_globs(FilesNotFoundBehavior.error)) if len(binary_source_paths.files) != 1: raise InvalidFieldException( "No `entry_point` was set for the target " f"{repr(field_set.address)}, so it must have exactly one source, but it has " f"{len(binary_source_paths.files)}") entry_point_path = binary_source_paths.files[0] source_root = await Get( SourceRoot, SourceRootRequest, SourceRootRequest.for_file(entry_point_path), ) entry_point = PythonBinarySources.translate_source_file_to_entry_point( os.path.relpath(entry_point_path, source_root.path)) transitive_targets = await Get(TransitiveTargets, Addresses([field_set.address])) # Note that we get an intermediate PexRequest here (instead of going straight to a Pex) # so that we can get the interpreter constraints for use in runner_pex_request. requirements_pex_request = await Get( PexRequest, PexFromTargetsRequest, PexFromTargetsRequest.for_requirements([field_set.address], internal_only=True), ) requirements_request = Get(Pex, PexRequest, requirements_pex_request) sources_request = Get( PythonSourceFiles, PythonSourceFilesRequest(transitive_targets.closure, include_files=True)) output_filename = f"{field_set.address.target_name}.pex" runner_pex_request = Get( Pex, PexRequest( output_filename=output_filename, interpreter_constraints=requirements_pex_request. interpreter_constraints, additional_args=field_set.generate_additional_args( python_binary_defaults), internal_only=True, # Note that the entry point file is not in the Pex itself, but on the # PEX_PATH. This works fine! entry_point=entry_point, ), ) requirements, sources, runner_pex = await MultiGet(requirements_request, sources_request, runner_pex_request) merged_digest = await Get( Digest, MergeDigests([ requirements.digest, sources.source_files.snapshot.digest, runner_pex.digest ]), ) def in_chroot(relpath: str) -> str: return os.path.join("{chroot}", relpath) args = pex_env.create_argv(in_chroot(runner_pex.name), python=runner_pex.python) chrooted_source_roots = [in_chroot(sr) for sr in sources.source_roots] extra_env = { **pex_env.environment_dict(python_configured=runner_pex.python is not None), "PEX_PATH": in_chroot(requirements_pex_request.output_filename), "PEX_EXTRA_SYS_PATH": ":".join(chrooted_source_roots), } return RunRequest(digest=merged_digest, args=args, extra_env=extra_env)