def _run_analyzer(self, command, json_output=True):
        """Run command (analyzer), if a JSON output is expected, parse it

        :param command: command to be run (command with argument vector as array)
        :param json_output: True if output should be parsed
        :return: status, output, error triplet
        """
        self.log.debug("Executing command, timeout={timeout}: {cmd}".format(
            timeout=self._CLI_TIMEOUT, cmd=command))
        cmd = TimedCommand(command)
        status, output, error = cmd.run(timeout=self._CLI_TIMEOUT)
        self.log.debug("status: %d, output: %s, error: %s", status, output,
                       error)

        if status != 0:
            self.log.warning(
                "Executing command failed, return value: %d, stderr: '%s' ",
                status, error)

        # Some tools such as complexity-report write zero bytes to output (they are propagated from sources like
        # for npm/glob/7.0.3). This caused failures when pushing results to Postgres as Postgres cannot store
        # null bytes in results. Let's be safe here.
        output = list(line.replace('\\u0000', '\\\\0') for line in output)

        if json_output:
            if output:
                output = "".join(output)
                output = json.loads(output)
            else:
                output = {}

        return status, output, error
Ejemplo n.º 2
0
    def run_mercator(self,
                     arguments,
                     cache_path,
                     keep_path=False,
                     outermost_only=True,
                     timeout=300):
        result_data = {'status': 'unknown', 'summary': [], 'details': []}

        mercator_target = arguments.get('cache_sources_path', cache_path)
        tc = TimedCommand(['mercator', mercator_target])
        status, data, err = tc.run(
            timeout=timeout,
            is_json=True,
            update_env={'MERCATOR_JAVA_RESOLVE_POMS': 'true'})
        if status != 0:
            self.log.error(err)
            result_data['status'] = 'error'
            return result_data
        ecosystem_object = self.storage.get_ecosystem(arguments['ecosystem'])
        if ecosystem_object.is_backed_by(EcosystemBackend.pypi):
            # TODO: attempt static setup.py parsing with mercator
            items = [self._merge_python_items(mercator_target, data)]
        else:
            if outermost_only:
                # process only root level manifests (or the ones closest to the root level)
                items = self._data_normalizer.get_outermost_items(
                    data.get('items') or [])
            else:
                items = data.get('items') or []
            self.log.debug('mercator found %i projects, outermost %i',
                           len(data), len(items))

            if ecosystem_object.is_backed_by(EcosystemBackend.maven):
                # for maven we download both Jar and POM, we consider POM to be *the*
                #  source of information and don't want to duplicate info by including
                #  data from pom included in artifact (assuming it's included)
                items = [
                    data for data in items
                    if data['ecosystem'].lower() == 'java-pom'
                ]
        result_data['details'] = [
            self._data_normalizer.handle_data(data, keep_path=keep_path)
            for data in items
        ]

        result_data['status'] = 'success'
        return result_data
Ejemplo n.º 3
0
    def execute(self, arguments):
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        result_data = {'status': 'unknown', 'summary': [], 'details': {}}

        if self._is_valid_ecosystem(arguments['ecosystem']):
            hub = self._get_hub()

            # BlackDuck project doesn't have a notion of ecosystem, so we need to
            # namespace the project names ourselves, so for package `crumb` in the NPM ecosystem
            # we'll end up with the name `npm-crumb`
            project = self._get_project_name(arguments)
            version = arguments['version']

            # Check if the given project had already been scanned
            data = self._release_data(hub, project, version)

            if not data and self._allow_cli_scan:
                self.log.debug("No data available for project {p} {v}".format(
                    p=project, v=version))
                # No data available, issue a new scan and re-query release data
                source_tarball_path = ObjectCache.get_from_dict(
                    arguments).get_source_tarball()
                command = self._prepare_command(project, version,
                                                source_tarball_path)
                self.log.debug(
                    "Executing command, timeout={timeout}: {cmd}".format(
                        timeout=self._BLACKDUCK_CLI_TIMEOUT, cmd=command))
                bd = TimedCommand(command)
                status, output, error = bd.run(
                    timeout=self._BLACKDUCK_CLI_TIMEOUT,
                    update_env={'BD_HUB_PASSWORD': config.blackduck_password})
                self.log.debug("status = %s, error = %s", status, error)
                self.log.debug("output = %s", output)
                data = self._release_data(hub, project, version)

            self.log.debug("Release data for project {p} {v}: {d}".format(
                p=project, v=version, d=data))
            result_data['details'] = data
            result_data['status'] = 'success' if data else 'error'
        else:
            result_data['status'] = 'error'

        return result_data
Ejemplo n.º 4
0
    def execute(self, arguments):
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        cache_path = ObjectCache.get_from_dict(arguments).get_source_tarball()

        results = []
        for path in get_all_files_from(cache_path, path_filter=skip_git_files):
            self.log.debug("path = %s", path)

            bw = TimedCommand(['binwalk', '-B', path])
            status, output, error = bw.run(timeout=60)
            self.log.debug("status = %s, error = %s", status, error)
            self.log.debug("output = %s", output)

            parsed_binwalk = self.parse_binwalk(output)
            results.append({
                "path": os.path.relpath(path, cache_path),
                "output": parsed_binwalk,
            })
        return {'summary': [], 'status': 'success', 'details': results}
    def execute(self, arguments):
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        eco = arguments['ecosystem']
        pkg = arguments['name']
        tool_responses = {}
        result_summary = {
            'package_names': [],
            'registered_srpms': [],
            'all_rhn_channels': [],
            'all_rhsm_content_sets': [],
            'all_rhsm_product_names': []
        }
        result_data = {'status': 'error',
                       'summary': result_summary,
                       'details': tool_responses
                       }

        # bail out early; we need access to internal services or the package is from Maven ecosystem,
        # otherwise we can't comment on downstream usage
        is_maven = Ecosystem.by_name(self.storage.session, eco).is_backed_by(EcosystemBackend.maven)
        if not self._is_inside_rh() and not is_maven:
            return result_data

        self.log.debug('Fetching {e}/{p} from Anitya'.format(e=eco, p=pkg))
        res = self._fetch_anitya_project(eco, pkg)
        anitya_rpm_names = []
        anitya_mvn_names = []
        if res is None:
            result_data['status'] = 'error'
        elif res.status_code == 200:
            self.log.debug('Retrieved {e}/{p} from Anitya'.format(e=eco, p=pkg))
            anitya_response = res.json()
            tool_responses['redhat_anitya'] = anitya_response
            # For now, we assume all downstreams are ones we care about
            for entry in anitya_response['packages']:
                if entry['distro'] == RH_RPM_DISTRO_NAME:
                    anitya_rpm_names.append(entry['package_name'])
                elif entry['distro'] == RH_MVN_DISTRO_NAME:
                    anitya_mvn_names.append(entry['package_name'])
                else:
                    self.log.warning(
                        'Unknown distro {d} for downstream package {o} (package {p}) in Anitya'.
                                     format(d=entry['distro'], o=entry['package_name'], p=pkg)
                    )
            self.log.debug('Candidate RPM names from Anitya: {}'.format(anitya_rpm_names))
            self.log.debug('Candidate MVN names from Anitya: {}'.format(anitya_mvn_names))
            # TODO: Report 'partial' here and switch to 'success' at the end
            result_data['status'] = 'success'
        else:
            msg = 'Failed to find Anitya project {e}/{p}. Anitya response: {r}'
            self.log.error(msg.format(e=eco, p=pkg, r=res.text))
            result_data['status'] = 'error'

        if self._is_inside_rh():
            # we have candidate downstream name mappings, check them against Brew
            seed_names = anitya_rpm_names or [self._prefix_package_name(pkg, eco)]
            self.log.debug('Checking candidate names in Brew: {}'.format(seed_names))

            args = ['brew-utils-cli', '--version', arguments['version']]
            artifact_hash = self._get_artifact_hash(algorithm='sha256')
            if artifact_hash:
                args += ['--digest', artifact_hash]
            args += seed_names

            self.log.debug("Executing command, timeout={timeout}: {cmd}".format(timeout=self._BREWUTILS_CLI_TIMEOUT,
                                                                                cmd=args))
            tc = TimedCommand(args)
            status, output, error = tc.run(timeout=self._BREWUTILS_CLI_TIMEOUT)
            self.log.debug("status = %s, error = %s", status, error)
            output = ''.join(output)
            self.log.debug("output = %s", output)
            if not output:
                raise TaskError("Error running command %s" % args)
            brew = json.loads(output)

            result_summary['package_names'] = brew['packages']
            result_summary['registered_srpms'] = brew['response']['registered_srpms']
            tool_responses['brew'] = brew['response']['brew']

            # we have SRPM details, fetch details on where the RPMs are shipped
            tool_responses['pulp_cdn'] = pulp_responses = []
            rhn_channels = set()
            rhsm_content_sets = set()
            rhsm_product_names = set()
            for srpm_summary in result_summary['registered_srpms']:
                srpm_filename = "{n}-{v}-{r}.src.rpm".format(n=srpm_summary['package_name'],
                                                             v=srpm_summary['version'],
                                                             r=srpm_summary['release'])
                cdn_metadata = self._get_cdn_metadata(srpm_filename)
                if cdn_metadata is None:
                    msg = 'Error getting shipping data for {e}/{p} SRPM: {srpm}'
                    self.log.error(msg.format(e=eco, p=pkg, srpm=srpm_filename))
                    continue
                pulp_responses.append(cdn_metadata)
                srpm_summary['published_in'] = cdn_metadata['rhsm_product_names']
                rhn_channels.update(cdn_metadata['rhn_channels'])
                rhsm_content_sets.update(cdn_metadata['rhsm_content_sets'])
                rhsm_product_names.update(cdn_metadata['rhsm_product_names'])
            result_summary['all_rhn_channels'] = sorted(rhn_channels)
            result_summary['all_rhsm_content_sets'] = sorted(rhsm_content_sets)
            result_summary['all_rhsm_product_names'] = sorted(rhsm_product_names)

        self._add_mvn_results(result_summary, anitya_mvn_names, arguments['version'])

        return result_data