def _process_source_code(self, source_location, srcdir, import_path=None, excludes=None): """Run retrodep on the source code and parse its output. :param SourceLocation source_location: local source code DB node :param str srcdir: path to source code to examine :param str/None import_path: import path for top-level module :param list/None excludes: list of globs to ignore """ stdout, stderr = self._run_retrodep(srcdir, import_path=import_path, excludes=excludes) # Parse the output from retrodep. for line in stdout.splitlines(): fields = line.split('\t') if len(fields) != 4: log.error(f'invalid retrodep output: {line}') continue mod, ver, repo, rev = fields # The module field begins with an asterisk for a top-level module srctype = self.VENDORED if mod.startswith('*'): mod = mod[1:] srctype = self.UPSTREAM self._process_go_module(source_location, srctype, mod, ver, repo, rev)
def _get_rpms_diff(self, parent_archive_id, child_archive_id): """ Determine the RPMs installed in the "child" container image layer. :param int parent_archive_id: the archive ID of the parent container image layer :param int child_archive_id: the archive ID of the child container image layer :return: a list of the RPMs (Koji RPM info dictionaries) installed in the child container image layer :rtype: list """ parent_rpm_ids = set() for rpm in self.koji_session.listRPMs(imageID=parent_archive_id): parent_rpm_ids.add(rpm['id']) child_rpm_ids = set() id_to_rpm = {} image_rpm_file = self.read_metadata_file(self.IMAGE_RPM_FILE) rpms = image_rpm_file.get(str(child_archive_id), []) if not rpms: log.error( f'No RPM files found in IMAGE_RPM_FILE for archive ID {child_archive_id}' ) for rpm in rpms: id_to_rpm[rpm['id']] = rpm child_rpm_ids.add(rpm['id']) diff_rpms = [] for rpm_id in (child_rpm_ids - parent_rpm_ids): diff_rpms.append(id_to_rpm[rpm_id]) return diff_rpms
def run(self): """ Start the container RPM analyzer. :raises AnalysisFailure: if the analyzer completed with errors """ build_info = self.read_metadata_file(self.BUILD_FILE) build_id = build_info['id'] if build_info['type'] != self.CONTAINER_BUILD_TYPE: log.info( f'Skipping build {build_id} because the build is not a container' ) return # Create a mapping of arch to archive (container image) so we can easily map to the # parent container archives in a future loop arch_to_archive = {} not_container_msg = 'Skipping archive {0} since it\'s not a container image' for archive in self.read_metadata_file(self.ARCHIVE_FILE): if not self.is_container_archive(archive): log.debug(not_container_msg.format(archive['id'])) continue arch = archive['extra']['image']['arch'] if arch in arch_to_archive: log.error( f'Build {build_id} has more than one container image with the arch {arch}' ) continue arch_to_archive[arch] = archive parent_build_id = build_info['extra']['image'].get('parent_build_id') # If there is a parent to this image, then only get the RPMs installed in this layer # and mark them as embedded artifacts on this container image if parent_build_id is not None: # Find the RPMs installed in this layer versus the parent image for archive in self.koji_session.listArchives(parent_build_id): if not self.is_container_archive(archive): log.debug(not_container_msg.format(archive['id'])) continue arch = archive['extra']['image']['arch'] if arch not in arch_to_archive: log.debug( f'The parent build {parent_build_id} contains an extra arch of {arch}' ) continue rpms = self._get_rpms_diff(archive['id'], arch_to_archive[arch]['id']) self._process_embedded_rpms(arch_to_archive[arch], rpms) # If there is no parent, then this is a base image. Just get all the RPMs installed in # the image and mark them as embedded artifacts in this container image. else: image_rpm_file = self.read_metadata_file(self.IMAGE_RPM_FILE) for archive in arch_to_archive.values(): rpms = image_rpm_file.get(str(archive['id'])) self._process_embedded_rpms(archive, rpms) # Claim all files from installed RPMs. self._claim_rpm_files(arch_to_archive.values())
def _create_or_update_parent(self, build_id): """Create or update a parent build and its archives (container images). :param build_id: build ID of the parent build to process :return: dictionary of container image artifacts indexed by architectures :rtype: dict """ parent_build = content.Build.get_or_create({ 'id_': build_id, 'type_': 'buildContainer', })[0] archives = self.koji_session.listArchives(build_id) arch_to_artifact = {} not_container_msg = 'Skipping archive {0} since it\'s not a container image' for archive in archives: if archive['btype'] != 'image': log.debug(not_container_msg.format(archive['id'])) continue architecture = archive['extra']['image']['arch'] if architecture in arch_to_artifact: log.error( f'Build {build_id} has more than one container image with the arch ' f'{architecture}') continue # Create or get the archive artifact that is the product of this build artifact = self.create_or_update_archive_artifact_from_archive_info( archive) arch_to_artifact[artifact.architecture] = artifact # If an archive was created in the previous step, connect it to this build. if not artifact.build.is_connected(parent_build): artifact.build.connect(parent_build) return arch_to_artifact
def run(self): """ Start the container analyzer. :raises AnalysisFailure: if the analyzer completed with errors """ build_info = self.read_metadata_file(self.BUILD_FILE) build_id = build_info['id'] if build_info['type'] != self.CONTAINER_BUILD_TYPE: log.info( f'Skipping build {build_id} because the build is not a container' ) return # If this build has no parent image build, there is nothing to do here. parent_build_id = build_info['extra']['image'].get('parent_build_id') if parent_build_id is None: return # This container's build is assumed to exist since it is created by the main analyzer. build = content.Build.nodes.get(id_=build_id) # Process parent build and embed all artifacts of the parent build to the artifacts of # this build's artifacts. arch_to_artifact = self._create_or_update_parent(parent_build_id) for archive in build.artifacts.filter(type_='container').all(): related_archive = arch_to_artifact.get(archive.architecture) if not related_archive: log.error( 'no artifact to link to, architecture does not exist in parent build' ) continue archive.embedded_artifacts.connect(related_archive) image_info = build_info['extra']['image'] try: parent_image_builds = image_info['parent_image_builds'].values() # Process parent builds used as buildroots (those specified in `parent_image_builds` # besides the `parent_build_id`. Embed all artifacts of each parent build as buildroot # artifacts of this build's artifacts. parent_image_builds_ids = { build['id'] for build in parent_image_builds if build['id'] != parent_build_id } except KeyError: # Older builds had different metadata in the extra field. parent_image_builds_ids = [image_info['parent_build_id']] for buildroot_parent_build_id in parent_image_builds_ids: arch_to_artifact = self._create_or_update_parent( buildroot_parent_build_id) for archive in build.artifacts.filter(type_='container').all(): related_archive = arch_to_artifact.get(archive.architecture) if not related_archive: log.error( 'no artifact to link to, architecture does not exist in parent build' ) continue archive.buildroot_artifacts.connect(related_archive)
def execute_batch_and_return_artifacts(self, claim=False): """ Execute the stored Koji batch and return the Artifacts created. :param bool claim: If we should claim the file if we discover an artifact. Default False. :return: A list of Artifacts created. :rtype: list """ ret = [] if not self.batch: return ret # gracefully exit early if batch is empty responses = self.koji_session.multiCall() # Process the individual responses. Responses are returned in the same # order the calls are added, so we can zip it up to pair back with the # file path. for (path_to_archive, relative_filepath), response in zip(self.batch, responses): archive = os.path.basename(path_to_archive) is_rpm = relative_filepath.endswith('.rpm') # If Koji could not find it or there was some other error, log it # and continue. Response is either a dict if an error, or a list of # one element if found. if isinstance(response, dict): log.error( f'Error received from Koji looking up {relative_filepath}' f' embedded in {archive} in build {self.build_id}. Koji error ' f'{response["faultString"]}') continue artifact_info = response[0] if not artifact_info: log.info( f'Cannot find build for {relative_filepath} embedded in ' f'{archive} in build {self.build_id}.') continue if not is_rpm: # listArchives returns a list where getRPM returns a hash directly artifact_info = artifact_info[0] artifact_build_id = artifact_info.get('build_id') if not artifact_build_id: log.error(f'Empty build found in Koji for {relative_filepath} ' f'embedded in {archive} in build {self.build_id}') continue log.info( f'Linking discovered embedded artifact {relative_filepath} ' f'embedded in {archive} in build {self.build_id}') artifact_build = content.Build.get_or_create({ 'id_': artifact_build_id, 'type_': 'build' if is_rpm else artifact_info['btype'], # TODO bug! })[0] if is_rpm: artifact = self.create_or_update_rpm_artifact_from_rpm_info( artifact_info) else: artifact = self.create_or_update_archive_artifact_from_archive_info( artifact_info) self.conditional_connect(artifact.build, artifact_build) ret.append(artifact) if claim: self.claim_file(path_to_archive, relative_filepath) # Clear the processed batch. self.batch = [] return ret
def run(self): """ Start the container Go analyzer. :raises AnalysisFailure: if the analyzer completed with errors """ # Check we have access to the executables we need. assert_command(self.RETRODEP) assert_command(self.GOVERSION) build_info = self.read_metadata_file(self.BUILD_FILE) build_id = build_info['id'] if build_info['type'] != self.CONTAINER_BUILD_TYPE: log.info( f'Skipping build {build_id} because the build is not a container' ) return # This container's build is assumed to exist since it is # created by the main analyzer. build = content.Build.nodes.get(id_=build_id) source_locations = build.source_location.all() try: source_location = source_locations[0] except IndexError: msg = f'Missing source location for container build {build_id}' log.error(msg) raise AnalysisFailure(msg) srcdir = os.path.join(self.input_dir, self.SOURCE_DIR) # Store the failure messages so they can be returned in an AnalysisFailure exception failures = [] failed_src_exc_msg = 'Failed while processing the source in "{}"' failed_src_msg = 'Failed while processing the source in "{}" with "{}"' # First process the source code that's directly available in # the dist-git repository. try: self._process_git_source(source_location, srcdir) except RuntimeError as error: log.exception(failed_src_exc_msg.format(srcdir)) failures.append(failed_src_msg.format(srcdir, error)) # Next process source code from archives (from 'rhpkg sources'). # Look for tar archives and zip archives. tar_archives = glob(os.path.join(srcdir, '*.tar.*')) zip_archives = glob(os.path.join(srcdir, '*.zip')) archives = [(unpack_tar, archive) for archive in tar_archives] archives += [(unpack_zip, archive) for archive in zip_archives] for unpack, archive in archives: with tempfile.TemporaryDirectory() as subsrc: unpack(archive, subsrc) try: self._process_source_code(source_location, subsrc) except RuntimeError as error: log.exception(failed_src_exc_msg.format(srcdir)) failures.append(failed_src_msg.format(subsrc, error)) # Now claim all the Go executables. self._claim_go_executables() if failures: raise AnalysisFailure( 'GoAnalyzer completed with the following error(s): \n {}'. format("\n ".join(failures)))