def _claim_go_executables(self): """Claim executables identified by goversion.""" not_container_msg = 'Skipping archive {0} since it\'s not a container image' archives = self.read_metadata_file(self.ARCHIVE_FILE) for index, archive in enumerate(archives): if not self.is_container_archive(archive): log.debug(not_container_msg.format(archive['id'])) continue layer_dir = os.path.join(self.input_dir, self.UNPACKED_CONTAINER_LAYER_DIR, archive['filename']) cmd = [self.GOVERSION, '.'] log.info(f'Running {cmd}') gv = subprocess.Popen(cmd, cwd=layer_dir, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdout, stderr) = gv.communicate() returncode = gv.wait() if returncode: raise RuntimeError( f'The command "{" ".join(cmd)}" failed with: {stderr}') for line in stdout.splitlines(): path, _ = line.split(' ', 1) log.info( f'(archive {index+1}/{len(archives)}) Claiming {path}') self.claim_container_file(archive, path)
def _get_import_path_override(self, srcdir): """Look inside the Dockerfile for a named label. :param srcdir: path to source code to examine :return: import path override, or None :rtype str/None: """ label = 'io.openshift.source-repo-url' try: df = DockerfileParser(srcdir, cache_content=True) except IOError: log.exception('Unable to read Dockerfile') return None try: repo = df.labels[label] except KeyError: log.debug(f'No {label} label in Dockerfile') return None except: # noqa:E722 log.exception('Failed to process Dockerfile; ignoring') return None # Convert it to an import path by stripping off the scheme. (_, _, import_path) = repo.rpartition('://') if not import_path: return None return import_path
def _read_and_save_buildroots(self): """Save and link the rpms used in the buildroot for each artifact.""" buildroots_info = self.read_metadata_file(self.BUILDROOT_FILE) for buildroot_id, buildroot_info in buildroots_info.items(): log.debug('Creating artifacts for buildroot %s', buildroot_id) for rpm_info in buildroot_info: rpm = self.create_or_update_rpm_artifact_from_rpm_info(rpm_info) if buildroot_id not in self._buildroot_to_artifact: continue for artifact in self._buildroot_to_artifact[buildroot_id]: artifact.buildroot_artifacts.connect(rpm)
def claim_file(self, base_dir, path_in_base_dir): """ Claim (delete) a file in the base directory. This method is used by analyzers to claim a file they've identified. All directories are silently ignored. :param str base_dir: the base directory to claim a file from :param str path_in_base_dir: the path to the file in the base directory to claim :raises RuntimeError: when the path to the base_dir is not a directory """ if path_in_base_dir == '/': return file_path = path_in_base_dir.lstrip('/') if not os.path.isdir(base_dir): raise RuntimeError(f'The path "{base_dir}" is not a directory') abs_base_dir = os.path.abspath(base_dir) def _resolve_path(target): """Resolve the first symbolic link in the path recursively.""" current_path = target # Crawl upwards starting at the target until the base directory is reached while current_path != abs_base_dir: if os.path.islink(current_path): # Get the absolute path of the link's target but strip the starting slash link_target = os.path.abspath( os.readlink(current_path))[1:] # Find the path after the link, for instance, if the link is # `/opt/rh/httpd24/root/etc/httpd` => `/etc/httpd`, and the passed in target is # `/opt/rh/httpd24/root/etc/httpd/httpd.conf`, then we just want `httpd.conf`. path_after_link = os.path.relpath(target, current_path) # The resolved path for the above example would be the base directory plus # `etc/httpd/httpd.conf` resolved_path = os.path.join(abs_base_dir, link_target, path_after_link) # In case there is more than one link in the path, call this closure again return _resolve_path(resolved_path) current_path = os.path.dirname(current_path) # No links were found, so just return the target return target resolved_path = _resolve_path(os.path.join(abs_base_dir, file_path)) if os.path.isdir(resolved_path): log.debug( f'Ignoring "{resolved_path}" since directories don\'t get claimed' ) elif os.path.isfile(resolved_path): log.debug(f'Claiming file "{resolved_path}"') os.remove(resolved_path)
def run(self): """ Do the actual processing. :raises AnalysisFailure: if the analyzer completed with errors """ build_info = self.read_metadata_file(self.BUILD_FILE) # Construct the Build object build = content.Build.get_or_create({ 'id_': build_info['id'], 'type_': build_info['type']})[0] if build_info['type'] not in self.SUPPORTED_BUILD_TYPES: return # Construct the component component, canonical_version = self._construct_and_save_component( build_info['type'], build_info) # Construct the local SourceLocation build_source = build_info['source'] local_source_location = self.create_or_update_source_location( build_source, component, canonical_version) self.conditional_connect(build.source_location, local_source_location) # Record the rpms associated with this build rpms_info = self.read_metadata_file(self.RPM_FILE) for rpm_info in rpms_info: buildroot_id = rpm_info['buildroot_id'] rpm = self.create_or_update_rpm_artifact_from_rpm_info(rpm_info) self.conditional_connect(rpm.build, build) self._map_buildroot_to_artifact(buildroot_id, rpm) # Record the artifacts archives_info = self.read_metadata_file(self.ARCHIVE_FILE) for archive_info in archives_info: if archive_info['btype'] == 'log': # No one cares about logs continue log.debug('Creating build artifact %s', archive_info['id']) archive = self.create_or_update_archive_artifact_from_archive_info(archive_info) self.conditional_connect(archive.build, build) self._map_buildroot_to_artifact(archive_info['buildroot_id'], archive) self._read_and_save_buildroots()
def read_metadata_file(self, in_file): """ Read and return the specified json metadata file or an empty dict. :param str in_file: The name of the input file to read. Probably one of the class constants. :return: a dict or list read from the file, or an empty dict :rtype: {} :raises ValueError: if the file was not valid json content """ filename = os.path.join(self.input_dir, self.METADATA_DIR, in_file) if os.path.isfile(filename): with open(filename, 'r') as f: return json.load(f) else: log.debug('File not found: %s, returning empty dict', filename) return {}
def main(self, input_dir='/metadata'): """ Call this to run the analyzer. :param str input_dir: The directory in which to find the files. """ self.input_dir = input_dir neomodel.db.set_connection(config.DATABASE_URL) # run the analyzer in a transaction neomodel.db.begin() try: self.run() log.debug('Analyzer completed successfully, committing.') neomodel.db.commit() except Exception as e: log.exception( 'Error encountered executing Analyzer, rolling back transaction.' ) neomodel.db.rollback() raise
def run(self): """ Start the container RPM analyzer. :raises AnalysisFailure: if the analyzer completed with errors """ build_info = self.read_metadata_file(self.BUILD_FILE) build_id = build_info['id'] if build_info['type'] != self.CONTAINER_BUILD_TYPE: log.info( f'Skipping build {build_id} because the build is not a container' ) return # Create a mapping of arch to archive (container image) so we can easily map to the # parent container archives in a future loop arch_to_archive = {} not_container_msg = 'Skipping archive {0} since it\'s not a container image' for archive in self.read_metadata_file(self.ARCHIVE_FILE): if not self.is_container_archive(archive): log.debug(not_container_msg.format(archive['id'])) continue arch = archive['extra']['image']['arch'] if arch in arch_to_archive: log.error( f'Build {build_id} has more than one container image with the arch {arch}' ) continue arch_to_archive[arch] = archive parent_build_id = build_info['extra']['image'].get('parent_build_id') # If there is a parent to this image, then only get the RPMs installed in this layer # and mark them as embedded artifacts on this container image if parent_build_id is not None: # Find the RPMs installed in this layer versus the parent image for archive in self.koji_session.listArchives(parent_build_id): if not self.is_container_archive(archive): log.debug(not_container_msg.format(archive['id'])) continue arch = archive['extra']['image']['arch'] if arch not in arch_to_archive: log.debug( f'The parent build {parent_build_id} contains an extra arch of {arch}' ) continue rpms = self._get_rpms_diff(archive['id'], arch_to_archive[arch]['id']) self._process_embedded_rpms(arch_to_archive[arch], rpms) # If there is no parent, then this is a base image. Just get all the RPMs installed in # the image and mark them as embedded artifacts in this container image. else: image_rpm_file = self.read_metadata_file(self.IMAGE_RPM_FILE) for archive in arch_to_archive.values(): rpms = image_rpm_file.get(str(archive['id'])) self._process_embedded_rpms(archive, rpms) # Claim all files from installed RPMs. self._claim_rpm_files(arch_to_archive.values())
def _create_or_update_parent(self, build_id): """Create or update a parent build and its archives (container images). :param build_id: build ID of the parent build to process :return: dictionary of container image artifacts indexed by architectures :rtype: dict """ parent_build = content.Build.get_or_create({ 'id_': build_id, 'type_': 'buildContainer', })[0] archives = self.koji_session.listArchives(build_id) arch_to_artifact = {} not_container_msg = 'Skipping archive {0} since it\'s not a container image' for archive in archives: if archive['btype'] != 'image': log.debug(not_container_msg.format(archive['id'])) continue architecture = archive['extra']['image']['arch'] if architecture in arch_to_artifact: log.error( f'Build {build_id} has more than one container image with the arch ' f'{architecture}') continue # Create or get the archive artifact that is the product of this build artifact = self.create_or_update_archive_artifact_from_archive_info( archive) arch_to_artifact[artifact.architecture] = artifact # If an archive was created in the previous step, connect it to this build. if not artifact.build.is_connected(parent_build): artifact.build.connect(parent_build) return arch_to_artifact
def run(self): """Do the actual processing.""" build_info = self.read_metadata_file(self.BUILD_FILE) task_info = self.read_metadata_file(self.TASK_FILE) build_type = None if task_info: build_type = task_info['method'] # construct the component component, canonical_version = self._construct_and_save_component( build_type, build_info) # construct the SourceLocation source = build_info['source'] sourceLocation = self.create_or_update_source_location( source, canonical_version) sourceLocation.component.connect(component) # construct the build object build = content.Build.get_or_create({ 'id_': build_info['id'], 'type_': build_type })[0] build.source_location.connect(sourceLocation) # record the rpms associated with this build rpms_info = self.read_metadata_file(self.RPM_FILE) for rpm_info in rpms_info: buildroot_id = rpm_info['buildroot_id'] rpm = self.create_or_update_rpm_artifact_from_rpm_info(rpm_info) rpm.build.connect(build) self._map_buildroot_to_artifact(buildroot_id, rpm) # record the artifacts archives_info = self.read_metadata_file(self.ARCHIVE_FILE) images_rpm_info = self.read_metadata_file(self.IMAGE_RPM_FILE) for archive_info in archives_info: if archive_info['type'] == 'log': # No one cares about logs continue log.debug('Creating build artifact %s', archive_info['id']) aid = archive_info['id'] atype = archive_info['btype'] checksum = archive_info['checksum'] filename = archive_info['filename'] buildroot_id = archive_info['buildroot_id'] # Find the nested arch information or set noarch. Note that 'extra' can exist # and be set to None in real data, so you can't chain all the gets. extra = archive_info.get('extra', {}) if extra: arch = extra.get('image', {}).get('arch', 'noarch') else: arch = 'noarch' archive = self.create_or_update_archive_artifact( aid, filename, arch, atype, checksum) archive.build.connect(build) self._map_buildroot_to_artifact(buildroot_id, archive) if aid in images_rpm_info: # It's an image and we know it contains some rpms. Save them. for rpm_info in images_rpm_info[aid]: rpm = self.create_or_update_rpm_artifact_from_rpm_info( rpm_info) archive.embedded_artifacts.connect(rpm) self._read_and_save_buildroots()