Exemple #1
0
    def _claim_go_executables(self):
        """Claim executables identified by goversion."""
        not_container_msg = 'Skipping archive {0} since it\'s not a container image'
        archives = self.read_metadata_file(self.ARCHIVE_FILE)
        for index, archive in enumerate(archives):
            if not self.is_container_archive(archive):
                log.debug(not_container_msg.format(archive['id']))
                continue

            layer_dir = os.path.join(self.input_dir,
                                     self.UNPACKED_CONTAINER_LAYER_DIR,
                                     archive['filename'])

            cmd = [self.GOVERSION, '.']
            log.info(f'Running {cmd}')
            gv = subprocess.Popen(cmd,
                                  cwd=layer_dir,
                                  universal_newlines=True,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
            (stdout, stderr) = gv.communicate()
            returncode = gv.wait()
            if returncode:
                raise RuntimeError(
                    f'The command "{" ".join(cmd)}" failed with: {stderr}')

            for line in stdout.splitlines():
                path, _ = line.split(' ', 1)
                log.info(
                    f'(archive {index+1}/{len(archives)}) Claiming {path}')
                self.claim_container_file(archive, path)
Exemple #2
0
    def _get_import_path_override(self, srcdir):
        """Look inside the Dockerfile for a named label.

        :param srcdir: path to source code to examine
        :return: import path override, or None
        :rtype str/None:
        """
        label = 'io.openshift.source-repo-url'
        try:
            df = DockerfileParser(srcdir, cache_content=True)
        except IOError:
            log.exception('Unable to read Dockerfile')
            return None

        try:
            repo = df.labels[label]
        except KeyError:
            log.debug(f'No {label} label in Dockerfile')
            return None
        except:  # noqa:E722
            log.exception('Failed to process Dockerfile; ignoring')
            return None

        # Convert it to an import path by stripping off the scheme.
        (_, _, import_path) = repo.rpartition('://')
        if not import_path:
            return None

        return import_path
 def _read_and_save_buildroots(self):
     """Save and link the rpms used in the buildroot for each artifact."""
     buildroots_info = self.read_metadata_file(self.BUILDROOT_FILE)
     for buildroot_id, buildroot_info in buildroots_info.items():
         log.debug('Creating artifacts for buildroot %s', buildroot_id)
         for rpm_info in buildroot_info:
             rpm = self.create_or_update_rpm_artifact_from_rpm_info(rpm_info)
             if buildroot_id not in self._buildroot_to_artifact:
                 continue
             for artifact in self._buildroot_to_artifact[buildroot_id]:
                 artifact.buildroot_artifacts.connect(rpm)
Exemple #4
0
    def claim_file(self, base_dir, path_in_base_dir):
        """
        Claim (delete) a file in the base directory.

        This method is used by analyzers to claim a file they've identified. All directories are
        silently ignored.

        :param str base_dir: the base directory to claim a file from
        :param str path_in_base_dir: the path to the file in the base directory to claim
        :raises RuntimeError: when the path to the base_dir is not a directory
        """
        if path_in_base_dir == '/':
            return

        file_path = path_in_base_dir.lstrip('/')

        if not os.path.isdir(base_dir):
            raise RuntimeError(f'The path "{base_dir}" is not a directory')

        abs_base_dir = os.path.abspath(base_dir)

        def _resolve_path(target):
            """Resolve the first symbolic link in the path recursively."""
            current_path = target
            # Crawl upwards starting at the target until the base directory is reached
            while current_path != abs_base_dir:
                if os.path.islink(current_path):
                    # Get the absolute path of the link's target but strip the starting slash
                    link_target = os.path.abspath(
                        os.readlink(current_path))[1:]
                    # Find the path after the link, for instance, if the link is
                    # `/opt/rh/httpd24/root/etc/httpd` => `/etc/httpd`, and the passed in target is
                    # `/opt/rh/httpd24/root/etc/httpd/httpd.conf`, then we just want `httpd.conf`.
                    path_after_link = os.path.relpath(target, current_path)
                    # The resolved path for the above example would be the base directory plus
                    # `etc/httpd/httpd.conf`
                    resolved_path = os.path.join(abs_base_dir, link_target,
                                                 path_after_link)
                    # In case there is more than one link in the path, call this closure again
                    return _resolve_path(resolved_path)
                current_path = os.path.dirname(current_path)
            # No links were found, so just return the target
            return target

        resolved_path = _resolve_path(os.path.join(abs_base_dir, file_path))
        if os.path.isdir(resolved_path):
            log.debug(
                f'Ignoring "{resolved_path}" since directories don\'t get claimed'
            )
        elif os.path.isfile(resolved_path):
            log.debug(f'Claiming file "{resolved_path}"')
            os.remove(resolved_path)
    def run(self):
        """
        Do the actual processing.

        :raises AnalysisFailure: if the analyzer completed with errors
        """
        build_info = self.read_metadata_file(self.BUILD_FILE)

        # Construct the Build object
        build = content.Build.get_or_create({
            'id_': build_info['id'],
            'type_': build_info['type']})[0]

        if build_info['type'] not in self.SUPPORTED_BUILD_TYPES:
            return

        # Construct the component
        component, canonical_version = self._construct_and_save_component(
            build_info['type'], build_info)

        # Construct the local SourceLocation
        build_source = build_info['source']
        local_source_location = self.create_or_update_source_location(
            build_source, component, canonical_version)

        self.conditional_connect(build.source_location, local_source_location)

        # Record the rpms associated with this build
        rpms_info = self.read_metadata_file(self.RPM_FILE)
        for rpm_info in rpms_info:
            buildroot_id = rpm_info['buildroot_id']
            rpm = self.create_or_update_rpm_artifact_from_rpm_info(rpm_info)
            self.conditional_connect(rpm.build, build)
            self._map_buildroot_to_artifact(buildroot_id, rpm)

        # Record the artifacts
        archives_info = self.read_metadata_file(self.ARCHIVE_FILE)
        for archive_info in archives_info:
            if archive_info['btype'] == 'log':
                # No one cares about logs
                continue

            log.debug('Creating build artifact %s', archive_info['id'])
            archive = self.create_or_update_archive_artifact_from_archive_info(archive_info)
            self.conditional_connect(archive.build, build)
            self._map_buildroot_to_artifact(archive_info['buildroot_id'], archive)

        self._read_and_save_buildroots()
Exemple #6
0
    def read_metadata_file(self, in_file):
        """
        Read and return the specified json metadata file or an empty dict.

        :param str in_file: The name of the input file to read. Probably one of the class constants.
        :return: a dict or list read from the file, or an empty dict
        :rtype: {}
        :raises ValueError: if the file was not valid json content
        """
        filename = os.path.join(self.input_dir, self.METADATA_DIR, in_file)
        if os.path.isfile(filename):
            with open(filename, 'r') as f:
                return json.load(f)
        else:
            log.debug('File not found: %s, returning empty dict', filename)
            return {}
Exemple #7
0
    def main(self, input_dir='/metadata'):
        """
        Call this to run the analyzer.

        :param str input_dir: The directory in which to find the files.
        """
        self.input_dir = input_dir
        neomodel.db.set_connection(config.DATABASE_URL)
        # run the analyzer in a transaction
        neomodel.db.begin()
        try:
            self.run()
            log.debug('Analyzer completed successfully, committing.')
            neomodel.db.commit()
        except Exception as e:
            log.exception(
                'Error encountered executing Analyzer, rolling back transaction.'
            )
            neomodel.db.rollback()
            raise
Exemple #8
0
    def run(self):
        """
        Start the container RPM analyzer.

        :raises AnalysisFailure: if the analyzer completed with errors
        """
        build_info = self.read_metadata_file(self.BUILD_FILE)
        build_id = build_info['id']

        if build_info['type'] != self.CONTAINER_BUILD_TYPE:
            log.info(
                f'Skipping build {build_id} because the build is not a container'
            )
            return

        # Create a mapping of arch to archive (container image) so we can easily map to the
        # parent container archives in a future loop
        arch_to_archive = {}
        not_container_msg = 'Skipping archive {0} since it\'s not a container image'
        for archive in self.read_metadata_file(self.ARCHIVE_FILE):
            if not self.is_container_archive(archive):
                log.debug(not_container_msg.format(archive['id']))
                continue
            arch = archive['extra']['image']['arch']
            if arch in arch_to_archive:
                log.error(
                    f'Build {build_id} has more than one container image with the arch {arch}'
                )
                continue
            arch_to_archive[arch] = archive

        parent_build_id = build_info['extra']['image'].get('parent_build_id')
        # If there is a parent to this image, then only get the RPMs installed in this layer
        # and mark them as embedded artifacts on this container image
        if parent_build_id is not None:
            # Find the RPMs installed in this layer versus the parent image
            for archive in self.koji_session.listArchives(parent_build_id):
                if not self.is_container_archive(archive):
                    log.debug(not_container_msg.format(archive['id']))
                    continue
                arch = archive['extra']['image']['arch']
                if arch not in arch_to_archive:
                    log.debug(
                        f'The parent build {parent_build_id} contains an extra arch of {arch}'
                    )
                    continue

                rpms = self._get_rpms_diff(archive['id'],
                                           arch_to_archive[arch]['id'])
                self._process_embedded_rpms(arch_to_archive[arch], rpms)
        # If there is no parent, then this is a base image. Just get all the RPMs installed in
        # the image and mark them as embedded artifacts in this container image.
        else:
            image_rpm_file = self.read_metadata_file(self.IMAGE_RPM_FILE)
            for archive in arch_to_archive.values():
                rpms = image_rpm_file.get(str(archive['id']))
                self._process_embedded_rpms(archive, rpms)

        # Claim all files from installed RPMs.
        self._claim_rpm_files(arch_to_archive.values())
Exemple #9
0
    def _create_or_update_parent(self, build_id):
        """Create or update a parent build and its archives (container images).

        :param build_id: build ID of the parent build to process
        :return: dictionary of container image artifacts indexed by architectures
        :rtype: dict
        """
        parent_build = content.Build.get_or_create({
            'id_': build_id,
            'type_': 'buildContainer',
        })[0]

        archives = self.koji_session.listArchives(build_id)
        arch_to_artifact = {}
        not_container_msg = 'Skipping archive {0} since it\'s not a container image'

        for archive in archives:
            if archive['btype'] != 'image':
                log.debug(not_container_msg.format(archive['id']))
                continue

            architecture = archive['extra']['image']['arch']
            if architecture in arch_to_artifact:
                log.error(
                    f'Build {build_id} has more than one container image with the arch '
                    f'{architecture}')
                continue

            # Create or get the archive artifact that is the product of this build
            artifact = self.create_or_update_archive_artifact_from_archive_info(
                archive)
            arch_to_artifact[artifact.architecture] = artifact

            # If an archive was created in the previous step, connect it to this build.
            if not artifact.build.is_connected(parent_build):
                artifact.build.connect(parent_build)

        return arch_to_artifact
Exemple #10
0
    def run(self):
        """Do the actual processing."""
        build_info = self.read_metadata_file(self.BUILD_FILE)
        task_info = self.read_metadata_file(self.TASK_FILE)

        build_type = None
        if task_info:
            build_type = task_info['method']

        # construct the component
        component, canonical_version = self._construct_and_save_component(
            build_type, build_info)

        # construct the SourceLocation
        source = build_info['source']
        sourceLocation = self.create_or_update_source_location(
            source, canonical_version)
        sourceLocation.component.connect(component)

        # construct the build object
        build = content.Build.get_or_create({
            'id_': build_info['id'],
            'type_': build_type
        })[0]
        build.source_location.connect(sourceLocation)

        # record the rpms associated with this build
        rpms_info = self.read_metadata_file(self.RPM_FILE)
        for rpm_info in rpms_info:
            buildroot_id = rpm_info['buildroot_id']
            rpm = self.create_or_update_rpm_artifact_from_rpm_info(rpm_info)
            rpm.build.connect(build)
            self._map_buildroot_to_artifact(buildroot_id, rpm)

        # record the artifacts
        archives_info = self.read_metadata_file(self.ARCHIVE_FILE)
        images_rpm_info = self.read_metadata_file(self.IMAGE_RPM_FILE)
        for archive_info in archives_info:
            if archive_info['type'] == 'log':
                # No one cares about logs
                continue

            log.debug('Creating build artifact %s', archive_info['id'])
            aid = archive_info['id']
            atype = archive_info['btype']
            checksum = archive_info['checksum']
            filename = archive_info['filename']
            buildroot_id = archive_info['buildroot_id']
            # Find the nested arch information or set noarch. Note that 'extra' can exist
            # and be set to None in real data, so you can't chain all the gets.
            extra = archive_info.get('extra', {})
            if extra:
                arch = extra.get('image', {}).get('arch', 'noarch')
            else:
                arch = 'noarch'

            archive = self.create_or_update_archive_artifact(
                aid, filename, arch, atype, checksum)
            archive.build.connect(build)
            self._map_buildroot_to_artifact(buildroot_id, archive)

            if aid in images_rpm_info:
                # It's an image and we know it contains some rpms. Save them.
                for rpm_info in images_rpm_info[aid]:
                    rpm = self.create_or_update_rpm_artifact_from_rpm_info(
                        rpm_info)
                    archive.embedded_artifacts.connect(rpm)

        self._read_and_save_buildroots()