def stage(self, directory): mirror_dir = self.get_mirror_directory() try: manifest = self._load_manifest() except (OSError, SourceError) as e: raise SourceError("Unable to load manifest: {}".format(e)) from e try: for layer in manifest['layers']: layer_digest = layer['digest'] blob_path = os.path.join(mirror_dir, layer_digest + '.tar.gz') self._verify_blob(blob_path, expected_digest=layer_digest) def tar_filter(info): return not (info.isdev() or info.name.startswith('dev/')) with tarfile.open(blob_path) as tar: members = filter(tar_filter, tar.getmembers()) with self.tempdir() as td: tar.extractall(path=td, members=members) link_files(td, directory) except (OSError, SourceError, tarfile.TarError) as e: raise SourceError("{}: Error staging source: {}".format(self, e)) from e
def configure(self, node): # url is deprecated, but accept it as a valid key so that we can raise # a nicer warning. node.validate_keys(["registry-url", "image", "ref", "track", "url"] + Source.COMMON_CONFIG_KEYS) if "url" in node: raise SourceError( "{}: 'url' parameter is now deprecated, " "use 'registry-url' and 'image' instead.".format(self)) self.image = node.get_str("image") self.original_registry_url = node.get_str("registry-url", _DOCKER_HUB_URL) self.registry_url = self.translate_url(self.original_registry_url) if "ref" in node: self.digest = self._ref_to_digest(node.get_str("ref")) else: self.digest = None self.tag = node.get_str("track", "") or None self.architecture = (node.get_str("architecture", "") or default_architecture()) self.os = node.get_str("os", "") or default_os() if not (self.digest or self.tag): raise SourceError( "{}: Must specify either 'ref' or 'track' parameters".format( self)) self.client = DockerRegistryV2Client(self.registry_url) self.manifest = None
def stage(self, directory): mirror_dir = self.get_mirror_directory() try: manifest = self._load_manifest() except (OSError, SourceError) as e: raise SourceError("Unable to load manifest: {}".format(e)) from e try: for layer in manifest["layers"]: layer_digest = layer["digest"] blob_path = os.path.join(mirror_dir, layer_digest + ".tar.gz") self._verify_blob(blob_path, expected_digest=layer_digest) ( extract_fileset, white_out_fileset, ) = self._get_extract_and_remove_files(blob_path) # remove files associated with whiteouts for white_out_file in white_out_fileset: white_out_file = os.path.join(directory, white_out_file) os.remove(white_out_file) # extract files for the current layer with tarfile.open(blob_path, tarinfo=ReadableTarInfo) as tar: with self.tempdir() as td: tar.extractall(path=td, members=extract_fileset) link_files(td, directory) except (OSError, SourceError, tarfile.TarError) as e: raise SourceError("{}: Error staging source: {}".format(self, e)) from e
def _download(self, url): try: with self.cargo.tempdir() as td: default_name = os.path.basename(url) request = urllib.request.Request(url) request.add_header('Accept', '*/*') # We do not use etag in case what we have in cache is # not matching ref in order to be able to recover from # corrupted download. if self.sha: etag = self._get_etag(self.sha) if etag and self.get_consistency() == Consistency.CACHED: request.add_header('If-None-Match', etag) with contextlib.closing( urllib.request.urlopen(request)) as response: info = response.info() etag = info['ETag'] if 'ETag' in info else None filename = info.get_filename(default_name) filename = os.path.basename(filename) local_file = os.path.join(td, filename) with open(local_file, 'wb') as dest: shutil.copyfileobj(response, dest) # Make sure url-specific mirror dir exists. os.makedirs(self._get_mirror_dir(), exist_ok=True) # Store by sha256sum sha256 = utils.sha256sum(local_file) # Even if the file already exists, move the new file over. # In case the old file was corrupted somehow. os.rename(local_file, self._get_mirror_file(sha256)) if etag: self._store_etag(sha256, etag) return sha256 except urllib.error.HTTPError as e: if e.code == 304: # 304 Not Modified. # Because we use etag only for matching sha, currently specified sha is what # we would have downloaded. return self.sha raise SourceError("{}: Error mirroring {}: {}".format( self, url, e), temporary=True) from e except (urllib.error.URLError, urllib.error.ContentTooShortError, OSError) as e: raise SourceError("{}: Error mirroring {}: {}".format( self, url, e), temporary=True) from e
def assert_safe(member): final_path = os.path.abspath(os.path.join(target_dir, member.path)) if not final_path.startswith(target_dir): raise SourceError( "{}: Tarfile attempts to extract outside the staging area: " "{} -> {}".format(self, member.path, final_path)) if member.islnk(): linked_path = os.path.abspath( os.path.join(target_dir, member.linkname)) if not linked_path.startswith(target_dir): raise SourceError( "{}: Tarfile attempts to hardlink outside the staging area: " "{} -> {}".format(self, member.path, final_path))
def configure(self, node): node.validate_keys( ["url", "ref", "track", "gpg-key", *Source.COMMON_CONFIG_KEYS] ) self.ostree = None self.original_url = node.get_str("url") self.url = self.translate_url(self.original_url) self.ref = node.get_str("ref", None) self.tracking = node.get_str("track", None) self.mirror = os.path.join( self.get_mirror_directory(), utils.url_directory_name(self.original_url), ) # At this point we now know if the source has a ref and/or a track. # If it is missing both then we will be unable to track or build. if self.ref is None and self.tracking is None: raise SourceError( "{}: OSTree sources require a ref and/or track".format(self), reason="missing-track-and-ref", ) # (optional) Not all repos are signed. But if they are, get the gpg key self.gpg_key_path = None gpg_key_node = node.get_scalar("gpg-key", None) if not gpg_key_node.is_none(): self.gpg_key = self.node_get_project_path( gpg_key_node, check_is_file=True ) self.gpg_key_path = os.path.join( self.get_project_directory(), self.gpg_key )
def _ensure_mirror(self, skip_ref_check=False): mirror_dir = self._get_mirror_dir() bzr_metadata_dir = os.path.join(mirror_dir, ".bzr") if not os.path.exists(bzr_metadata_dir): self.call([self.host_bzr, "init-repo", "--no-trees", mirror_dir], fail="Failed to initialize bzr repository") branch_dir = os.path.join(mirror_dir, self.tracking) branch_url = self.url + "/" + self.tracking if not os.path.exists(branch_dir): # `bzr branch` the branch if it doesn't exist # to get the upstream code self.call( [self.host_bzr, "branch", branch_url, branch_dir], fail="Failed to branch from {} to {}".format( branch_url, branch_dir), ) else: # `bzr pull` the branch if it does exist # to get any changes to the upstream code self.call( [ self.host_bzr, "pull", "--directory={}".format(branch_dir), branch_url ], fail="Failed to pull new changes for {}".format(branch_dir), ) if not skip_ref_check and not self._check_ref(): raise SourceError("Failed to ensure ref '{}' was mirrored".format( self.ref), reason="ref-not-mirrored")
def track(self): # pylint: disable=arguments-differ # If self.tracking is not specified it's not an error, just silently return if not self.tracking: # Is there a better way to check if a ref is given. if self.mirror.ref is None: detail = "Without a tracking branch ref can not be updated. Please " + "provide a ref or a track." raise SourceError( "{}: No track or ref".format(self), detail=detail, reason="track-attempt-no-track", ) return None # Resolve the URL for the message resolved_url = self.translate_url(self.mirror.url) with self.timed_activity( "Tracking {} from {}".format(self.tracking, resolved_url), silent_nested=True, ): self.mirror._fetch(resolved_url, fetch_all=True) ref = self.mirror.to_commit(self.tracking) tags = self.mirror.reachable_tags(ref) if self.track_tags else [] if self.ref_format == _RefFormat.GIT_DESCRIBE: ref = self.mirror.describe(ref) return ref, tags
def fetch(self, previous_sources_dir): workspace = os.path.join(previous_sources_dir, self.workspace_dir) repo_file = os.path.join(workspace, self.repo_file) used_host = self._ensure_repo_file(repo_file, workspace) assert os.path.isfile(repo_file) with open(repo_file) as repo: ref = hashlib.sha256() for line in repo.readlines(): ref.update(line.encode("utf-8")) # We don't check the source if the host bazel was used to generate a # repository resolved file, as it is not deterministically generated... # if used_host: self.warn( "{}: Not checking ref, using host bazel affects repo file". format(self)) elif ref.hexdigest() != self.get_ref(): raise SourceError("{}: Ref {} does not match specified {}".format( self, ref.hexdigest(), self.get_ref())) repo_contents = _import_repo_file(repo_file) dist_dir = os.path.join(self._mirror, self._distdir) if not os.path.isdir(dist_dir): os.makedirs(dist_dir) for source in repo_contents: self._handle_single_source(source, dist_dir)
def submodule_list(self): modules = "{}:{}".format(self.ref, GIT_MODULES) exit_code, output = self.source.check_output( [self.source.host_git, "show", modules], cwd=self.mirror) # If git show reports error code 128 here, we take it to mean there is # no .gitmodules file to display for the given revision. if exit_code == 128: return elif exit_code != 0: raise SourceError( "{plugin}: Failed to show gitmodules at ref {ref}".format( plugin=self, ref=self.ref)) content = "\n".join([l.strip() for l in output.splitlines()]) io = StringIO(content) parser = RawConfigParser() parser.read_file(io) for section in parser.sections(): # validate section name against the 'submodule "foo"' pattern if re.match(r'submodule "(.*)"', section): path = parser.get(section, "path") url = parser.get(section, "url") yield (path, url)
def _find_base_dir(self, tar, pattern): paths = self._list_tar_paths(tar) matches = sorted(list(utils.glob(paths, pattern))) if not matches: raise SourceError("{}: Could not find base directory matching pattern: {}".format(self, pattern)) return matches[0]
def fetch(self): # pylint: disable=arguments-differ with self.tempdir() as tmpdir: packages = self.ref.strip().split("\n") package_dir = os.path.join(tmpdir, "packages") os.makedirs(package_dir) self.call( [ *self.host_pip, "download", "--no-binary", ":all:", "--index-url", self.index_url, "--dest", package_dir, *packages, ], fail="Failed to install python packages: {}".format(packages), ) # If the mirror directory already exists, assume that some other # process has fetched the sources before us and ensure that we do # not raise an error in that case. try: utils.move_atomic(package_dir, self._mirror) except utils.DirectoryExistsError: # Another process has beaten us and has fetched the sources # before us. pass except OSError as e: raise SourceError( "{}: Failed to move downloaded pip packages from '{}' to '{}': {}".format( self, package_dir, self._mirror, e ) ) from e
def track(self, previous_sources_dir): new_ref = [] lockfile = os.path.join(previous_sources_dir, self.cargo_lock) try: with open(lockfile, "r") as f: try: lock = pytoml.load(f) except pytoml.core.TomlError as e: raise SourceError( "Malformed Cargo.lock file at: {}".format( self.cargo_lock), detail="{}".format(e), ) from e except FileNotFoundError as e: raise SourceError( "Failed to find Cargo.lock file at: {}".format( self.cargo_lock), detail="The cargo plugin expects to find a Cargo.lock file in\n" + "the sources staged before it in the source list, but none was found.", ) from e # FIXME: Better validation would be good here, so we can raise more # useful error messages in the case of a malformed Cargo.lock file. # for package in lock["package"]: if "source" not in package: continue new_ref += [{ "name": package["name"], "version": str(package["version"]) }] # Make sure the order we set it at track time is deterministic new_ref = sorted(new_ref, key=lambda c: (c["name"], c["version"])) # Download the crates and get their shas for crate_obj in new_ref: crate = Crate(self, crate_obj["name"], crate_obj["version"]) crate_url = crate._get_url() with self.timed_activity("Downloading: {}".format(crate_url), silent_nested=True): crate_obj["sha"] = crate._download(crate_url) return new_ref
def fetch(self): with self.timed_activity("Fetching image {}:{} with digest {}".format( self.image, self.tag, self.digest), silent_nested=True): mirror_dir = self.get_mirror_directory() try: manifest = self._load_manifest() except FileNotFoundError: try: manifest_text, digest = self.client.manifest( self.image, self.digest) except requests.RequestException as e: raise SourceError(e) from e if digest != self.digest: raise SourceError( "Requested image {}, got manifest with digest {}". format(self.digest, digest)) self._save_manifest(manifest_text) manifest = json.loads(manifest_text) except DockerManifestError as e: self.log("Unexpected manifest", detail=e.manifest) raise except (OSError, requests.RequestException) as e: raise SourceError(e) from e for layer in manifest['layers']: if layer[ 'mediaType'] != 'application/vnd.docker.image.rootfs.diff.tar.gzip': raise SourceError("Unsupported layer type: {}".format( layer['mediaType'])) layer_digest = layer['digest'] blob_path = os.path.join(mirror_dir, layer_digest + '.tar.gz') if not os.path.exists(blob_path): try: self.client.blob(self.image, layer_digest, download_to=blob_path) except (OSError, requests.RequestException) as e: if os.path.exists(blob_path): shutil.rmtree(blob_path) raise SourceError(e) from e self._verify_blob(blob_path, expected_digest=layer_digest)
def configure(self, node): ref = self.node_get_member(node, str, 'ref', '') or None config_keys = [ 'url', 'track', 'track-tags', 'track-extra', 'ref', 'submodules', 'checkout-submodules', 'match', 'exclude', 'full-clone', 'use-lfs' ] self.node_validate(node, config_keys + Source.COMMON_CONFIG_KEYS) self.original_url = self.node_get_member(node, str, 'url') self.full_clone = self.node_get_member(node, bool, 'full-clone', False) self.mirror = GitTagMirror(self, '', self.original_url, ref, primary=True, full_clone=self.full_clone) self.tracking = self.node_get_member(node, str, 'track', None) self.track_extra = self.node_get_member(node, list, 'track-extra', default=[]) self.track_tags = self.node_get_member(node, bool, 'track-tags', False) self.use_lfs = self.node_get_member(node, bool, 'use-lfs', None) self.match = self.node_get_member(node, list, 'match', []) self.exclude = self.node_get_member(node, list, 'exclude', []) # At this point we now know if the source has a ref and/or a track. # If it is missing both then we will be unable to track or build. if self.mirror.ref is None and self.tracking is None: raise SourceError( "{}: Git sources require a ref and/or track".format(self), reason="missing-track-and-ref") self.checkout_submodules = self.node_get_member( node, bool, 'checkout-submodules', True) self.submodules = [] # Parse a dict of submodule overrides, stored in the submodule_overrides # and submodule_checkout_overrides dictionaries. self.submodule_overrides = {} self.submodule_checkout_overrides = {} modules = self.node_get_member(node, Mapping, 'submodules', {}) for path, _ in self.node_items(modules): submodule = self.node_get_member(modules, Mapping, path) url = self.node_get_member(submodule, str, 'url', '') or None # Make sure to mark all URLs that are specified in the configuration if url: self.mark_download_url(url, primary=False) self.submodule_overrides[path] = url if 'checkout' in submodule: checkout = self.node_get_member(submodule, bool, 'checkout') self.submodule_checkout_overrides[path] = checkout self.mark_download_url(self.original_url)
def __do_stage(self, directory: Directory) -> None: assert isinstance(directory, Directory) with self.timed_activity("Staging local files"): result = directory.import_files(self.path, properties=["mtime"]) if result.overwritten or result.ignored: raise SourceError( "Failed to stage source: files clash with existing directory", reason="ensure-stage-dir-fail")
def _ensure_repo_file(self, repo_file, workspace): """Ensures that the repository resolved file exists. Returns whether the host bazel was used or not, for ref-checking purposes. """ if os.path.isfile(repo_file): return False # Warn and drop out early if host bazel is not allowed, or there is no host bazel self.warn( "{}: Repository file '{}' not found, falling back to host bazel". format(self, self.repo_file)) if not self.allow_host_bazel or not self.host_bazel: raise SourceError( "{}: No repository resolved file found and cannot fall back to host bazel" .format(self)) # Call a "fetch" with a temporary output base (in an attempt to keep the host clean) with self.tempdir() as tmpdir: exit_code, _ = self.check_output( [ self.host_bazel, "--output_base", tmpdir, "cquery", "--experimental_repository_resolved_file={}".format( repo_file), ] + self.targets, fail="Failed to generate repository file with host bazel", cwd=workspace, ) self.call( [self.host_bazel, "--output_base", tmpdir, "shutdown"], fail="Failed to shutdown host bazel", cwd=workspace, ) if exit_code != 0: raise SourceError( "{}: Failed to generate repository file with host bazel". format(self)) return True
def configure(self, node): node.validate_keys(["url", "packages", "ref", "requirements-files"] + Source.COMMON_CONFIG_KEYS) self.ref = node.get_str("ref", None) self.original_url = node.get_str("url", _PYPI_INDEX_URL) self.index_url = self.translate_url(self.original_url) self.packages = node.get_str_list("packages", []) self.requirements_files = node.get_str_list("requirements-files", []) if not (self.packages or self.requirements_files): raise SourceError("{}: Either 'packages' or 'requirements-files' must be specified".format(self))
def fetch(self, previous_sources_dir): fpath = os.path.join(previous_sources_dir, "file") # Verify that the checksum of the file from previous source matches # our ref with open(fpath, "rb") as f: if hashlib.sha256(f.read()).hexdigest() != self.ref.strip(): raise SourceError("Element references do not match") # Copy "file" as "filetransform" newfpath = os.path.join(self.mirror, "filetransform") utils.safe_copy(fpath, newfpath)
def fetch(self, alias_override=None): url = self.source.translate_url(self.original_url, alias_override=alias_override, primary=self.primary) with open(self.source.output_file, "a") as f: success = url in self.source.fetch_succeeds and self.source.fetch_succeeds[ url] message = "Fetch {} {} from {}\n".format( self.original_url, "succeeded" if success else "failed", url) f.write(message) if not success: raise SourceError("Failed to fetch {}".format(url))
def __do_stage(self, directory): with self.timed_activity("Staging local files into CAS"): if os.path.isdir( self.fullpath) and not os.path.islink(self.fullpath): result = directory.import_files(self.fullpath) else: result = directory.import_single_file(self.fullpath) if result.overwritten or result.ignored: raise SourceError( "Failed to stage source: files clash with existing directory", reason="ensure-stage-dir-fail")
def configure(self, node): super().configure(node) self.filename = node.get_str("filename", os.path.basename(self.url)) self.executable = node.get_bool("executable", default=False) if os.sep in self.filename: raise SourceError( "{}: filename parameter cannot contain directories".format( self), reason="filename-contains-directory") node.validate_keys(DownloadableFileSource.COMMON_CONFIG_KEYS + ["filename", "executable"])
def stage(self, directory): patch_dir = os.path.join(directory, self.path) with self.timed_activity( "quilt: Applying patches: {}".format(patch_dir)): if not os.path.isdir(patch_dir): raise SourceError( "Directory does not exist '{}'".format(patch_dir), reason="no-dir-found") # Call quilt command self.command = [self.host_quilt, "push", "-a"] self.call(self.command, cwd=patch_dir, fail="Error occurred while calling {}".format( self.command))
def _load_manifest(self): manifest_file = os.path.join(self.get_mirror_directory(), self.digest + ".manifest.json") with open(manifest_file, "rb") as f: text = f.read() real_digest = self.client.digest(text) if real_digest != self.digest: raise SourceError( "Manifest {} is corrupt; got content hash of {}".format( manifest_file, real_digest)) return json.loads(text.decode("utf-8"))
def parse_bearer_authorization_challenge(text): # Hand-written and probably broken parsing of the Www-Authenticate # response. I can't find a built-in way to parse this, but I probably # didn't look hard enough. if not text.startswith("Bearer "): raise SourceError( "Unexpected Www-Authenticate response: %{}".format(text)) pairs = {} text = text[len("Bearer "):] for pair in text.split(","): key, value = pair.split("=") pairs[key] = value[1:-1] return pairs
def preflight(self): # Try to find a pip version that spports download command self.host_pip = None for python in reversed(_PYTHON_VERSIONS): try: host_python = utils.get_host_tool(python) rc = self.call([host_python, "-m", "pip", "download", "--help"]) if rc == 0: self.host_pip = [host_python, "-m", "pip"] break except utils.ProgramNotFoundError: pass if self.host_pip is None: raise SourceError("{}: Unable to find a suitable pip command".format(self))
def configure(self, node): # url is deprecated, but accept it as a valid key so that we can raise # a nicer warning. self.node_validate(node, ['registry-url', 'image', 'ref', 'track', 'url'] + Source.COMMON_CONFIG_KEYS) if 'url' in node: raise SourceError( "{}: 'url' parameter is now deprecated, " "use 'registry-url' and 'image' instead.".format(self)) self.image = self.node_get_member(node, str, 'image') self.original_registry_url = self.node_get_member( node, str, 'registry-url', _DOCKER_HUB_URL) self.registry_url = self.translate_url(self.original_registry_url) if 'ref' in node: self.digest = self._ref_to_digest( self.node_get_member(node, str, 'ref')) else: self.digest = None self.tag = self.node_get_member(node, str, 'track', '') or None self.architecture = self.node_get_member(node, str, 'architecture', '') or default_architecture() self.os = self.node_get_member(node, str, 'os', '') or default_os() if not (self.digest or self.tag): raise SourceError( "{}: Must specify either 'ref' or 'track' parameters".format( self)) self.client = DockerRegistryV2Client(self.registry_url) self.manifest = None
def track(self): # If the tracking ref is not specified it's not an error, just silently return if not self.tag: return None with self.timed_activity( "Fetching image manifest for image: '{}:{}' from: {}".format( self.image, self.tag, self.registry_url)): try: manifest, digest = self.client.manifest(self.image, self.tag) except DockerManifestError as e: self.log("Problem downloading manifest", detail=e.manifest) raise except (OSError, requests.RequestException) as e: raise SourceError(e) from e return self._digest_to_ref(digest)
def stage(self, directory): with self.timed_activity("Applying local patch: {}".format(self.path)): # Bail out with a comprehensive message if the target directory is empty if not os.listdir(directory): raise SourceError( "Nothing to patch in directory '{}'".format(directory), reason="patch-no-files") strip_level_option = "-p{}".format(self.strip_level) self.call( [ self.host_patch, strip_level_option, "-i", self.fullpath, "-d", directory ], fail="Failed to apply patch {}".format(self.path), )
def fetch(self): # pylint: disable=arguments-differ # Just a defensive check, it is impossible for the # file to be already cached because Source.fetch() will # not be called if the source is already cached. # if self.is_cached(): return # pragma: nocover # Download the file, raise hell if the sha256sums don't match, # and mirror the file otherwise. with self.timed_activity("Fetching {}".format(self.url), silent_nested=True): sha256 = self._ensure_mirror() if sha256 != self.ref: raise SourceError( "File downloaded from {} has sha256sum '{}', not '{}'!". format(self.url, sha256, self.ref))