def _run_analyzer(self, command, json_output=True):
        """Run command (analyzer), if a JSON output is expected, parse it.

        :param command: command to be run (command with argument vector as array)
        :param json_output: True if output should be parsed
        :return: status, output, error triplet
        """
        self.log.debug("Executing command, timeout={timeout}: {cmd}".format(
            timeout=self._CLI_TIMEOUT, cmd=command))
        cmd = TimedCommand(command)
        status, output, error = cmd.run(timeout=self._CLI_TIMEOUT)
        self.log.debug("status: %d, output: %s, error: %s", status, output,
                       error)

        if status != 0:
            self.log.warning(
                "Executing command failed, return value: %d, stderr: '%s' ",
                status, error)

        # Some tools such as complexity-report write zero bytes to output (they
        # are propagated from sources like for npm/glob/7.0.3). This caused
        # failures when pushing results to Postgres as Postgres cannot store
        # null bytes in results. Let's be safe here.
        output = list(line.replace('\\u0000', '\\\\0') for line in output)

        if json_output:
            if output:
                output = "".join(output)
                output = json.loads(output)
            else:
                output = {}

        return status, output, error
    def execute(self, arguments):
        """Task code.

        :param arguments: dictionary with task arguments
        :return: {}, results
        """
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        cache_path = ObjectCache.get_from_dict(arguments).get_source_tarball()

        results = []
        for path in get_all_files_from(cache_path, path_filter=skip_git_files):
            self.log.debug("path = %s", path)

            bw = TimedCommand(['binwalk', '-B', path])
            status, output, error = bw.run(timeout=60)
            self.log.debug("status = %s, error = %s", status, error)
            self.log.debug("output = %s", output)

            parsed_binwalk = self.parse_binwalk(output)
            results.append({
                "path": os.path.relpath(path, cache_path),
                "output": parsed_binwalk,
            })
        return {'summary': [], 'status': 'success', 'details': results}
Beispiel #3
0
    def run_timed_command(cmd, file):
        """Run timed command and write output to file.

        :param cmd: command to run
        :param file: output file
        :return:
        """
        timed_cmd = TimedCommand(cmd)
        status, output, _ = timed_cmd.run(timeout=3600)
        if status != 0 or not file.is_file():
            # all errors are in stdout, not stderr
            raise TaskError(output)
    def run_scancode(scan_path):
        """Run scancode tool."""
        result_data = {
            'status': 'unknown',
            'summary': {},
            'details': {},
            'command': 'N/A'
        }
        command = [
            path.join(configuration.SCANCODE_PATH, 'scancode'),
            # Scan for licenses
            '--license',
            # Do not return license matches with scores lower than this score
            '--license-score',
            configuration.SCANCODE_LICENSE_SCORE,
            # Files without findings are omitted
            '--only-findings',
            # Use n parallel processes
            '--processes',
            configuration.SCANCODE_PROCESSES,
            # Do not print summary or progress messages
            '--quiet',
            # Strip the root directory segment of all paths
            '--strip-root',
            # Stop scanning a file if scanning takes longer than a timeout in seconds
            '--timeout',
            configuration.SCANCODE_TIMEOUT,
            scan_path
        ]
        for ignore_pattern in configuration.SCANCODE_IGNORE:
            command += ['--ignore', '{}'.format(ignore_pattern)]
        with username():
            tc = TimedCommand(command)
            status, output, error = tc.run(is_json=True, timeout=1200)
            if status != 0:
                return {
                    "status": status,
                    "output": output,
                    "error": error,
                    "command": command
                }

        details = LicenseCheckTask.process_output(output)
        result_data['details'] = details
        result_data['status'] = 'success'
        result_data['summary'] = {
            'sure_licenses': list(details['licenses'].keys())
        }
        result_data['command'] = command

        return result_data
Beispiel #5
0
    def execute(self, arguments):
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        result_data = {'status': 'unknown', 'summary': [], 'details': {}}

        if self._is_valid_ecosystem(arguments['ecosystem']):
            hub = self._get_hub()

            # BlackDuck project doesn't have a notion of ecosystem, so we need to
            # namespace the project names ourselves, so for package `crumb` in the NPM ecosystem
            # we'll end up with the name `npm-crumb`
            project = self._get_project_name(arguments)
            version = arguments['version']

            # Check if the given project had already been scanned
            data = self._release_data(hub, project, version)

            if not data and self._allow_cli_scan:
                self.log.debug("No data available for project {p} {v}".format(
                    p=project, v=version))
                # No data available, issue a new scan and re-query release data
                source_tarball_path = ObjectCache.get_from_dict(
                    arguments).get_source_tarball()
                command = self._prepare_command(project, version,
                                                source_tarball_path)
                self.log.debug(
                    "Executing command, timeout={timeout}: {cmd}".format(
                        timeout=self._BLACKDUCK_CLI_TIMEOUT, cmd=command))
                bd = TimedCommand(command)
                status, output, error = \
                    bd.run(timeout=self._BLACKDUCK_CLI_TIMEOUT,
                           update_env={'BD_HUB_PASSWORD': self.configuration.BLACKDUCK_PASSWORD})
                self.log.debug("status = %s, error = %s", status, error)
                self.log.debug("output = %s", output)
                data = self._release_data(hub, project, version)

            self.log.debug("Release data for project {p} {v}: {d}".format(
                p=project, v=version, d=data))
            result_data['details'] = data
            result_data['status'] = 'success' if data else 'error'
        else:
            result_data['status'] = 'error'

        return result_data
    def run_gofedlib(self, topdir, timeout):
        """Run gofedlib-cli to extract dependencies from golang sources."""
        tc = TimedCommand([
            'gofedlib-cli', '--dependencies-main', '--dependencies-packages',
            '--dependencies-test', '--skip-errors', topdir
        ])
        status, data, err = tc.run(timeout=timeout)

        if status:
            raise FatalTaskError('gofedlib-cli failed: {err}'.format(err=err))

        result = json.loads(data[0])
        main_deps_count = len(result.get('deps-main', []))
        packages_count = len(result.get('deps-packages', []))
        self.log.debug('gofedlib found %i dependencies',
                       main_deps_count + packages_count)

        return [{'ecosystem': 'gofedlib', 'result': result}]
    def run_gofedlib(self, topdir, name, version, timeout):
        """Run gofedlib-cli to extract dependencies from golang sources."""
        tc = TimedCommand([
            'gofedlib-cli', '--dependencies-main', '--dependencies-packages',
            '--dependencies-test', '--skip-errors', topdir
        ])
        status, data, err = tc.run(timeout=timeout)
        result = json.loads(data[0])
        main_deps_count = len(result.get('deps-main', []))
        packages_count = len(result.get('deps-packages', []))
        self.log.debug('gofedlib found %i dependencies',
                       main_deps_count + packages_count)

        result['code_repository'] = {
            'type': 'git',
            'url': 'https://{name}'.format(name=name)
        }
        result['name'] = name
        result['version'] = version
        return [{'ecosystem': 'gofedlib', 'result': result}]
    def extract_dependencies(github_repo, github_sha):
        """Extract the dependencies information.

        Currently assuming repository is maven repository.
        """
        with TemporaryDirectory() as workdir:
            repo = Git.clone(url=github_repo, path=workdir, timeout=3600)
            repo.reset(revision=github_sha, hard=True)
            with cwd(repo.repo_path):
                output_file = Path.cwd() / "dependency-tree.txt"
                cmd = ["mvn", "org.apache.maven.plugins:maven-dependency-plugin:3.0.2:tree",
                       "-DoutputType=dot",
                       "-DoutputFile={filename}".format(filename=output_file),
                       "-DappendOutput=true"]
                timed_cmd = TimedCommand(cmd)
                status, output, _ = timed_cmd.run(timeout=3600)
                if status != 0 or not output_file.is_file():
                    # all errors are in stdout, not stderr
                    raise TaskError(output)
                with output_file.open() as f:
                    return GithubDependencyTreeTask.parse_maven_dependency_tree(f.readlines())
Beispiel #9
0
    def get_maven_dependencies():
        """Get direct and indirect dependencies from pom.xml by using maven dependency tree plugin.

        :return: set of direct and indirect dependencies
        """
        output_file = Path.cwd() / "dependency-tree.txt"
        cmd = [
            "mvn",
            "org.apache.maven.plugins:maven-dependency-plugin:3.0.2:tree",
            "-DoutputType=dot",
            "-DoutputFile={filename}".format(filename=output_file),
            "-DappendOutput=true"
        ]
        timed_cmd = TimedCommand(cmd)
        status, output, _ = timed_cmd.run(timeout=3600)
        if status != 0 or not output_file.is_file():
            # all errors are in stdout, not stderr
            raise TaskError(output)
        with output_file.open() as f:
            return GithubDependencyTreeTask.parse_maven_dependency_tree(
                f.readlines())
    def run_mercator(self,
                     arguments,
                     cache_path,
                     keep_path=False,
                     outermost_only=True,
                     timeout=300,
                     resolve_poms=True):
        """Run mercator tool."""
        # TODO: reduce cyclomatic complexity
        result_data = {'status': 'unknown', 'summary': [], 'details': []}
        mercator_target = arguments.get('cache_sources_path', cache_path)

        tc = TimedCommand(['mercator', mercator_target])
        update_env = {
            'MERCATOR_JAVA_RESOLVE_POMS': 'true'
        } if resolve_poms else {}
        status, data, err = tc.run(timeout=timeout,
                                   is_json=True,
                                   update_env=update_env)
        if status != 0:
            self.log.error(err)
            raise FatalTaskError(err)

        ecosystem_object = self.storage.get_ecosystem(arguments['ecosystem'])
        if ecosystem_object.is_backed_by(EcosystemBackend.pypi):
            # TODO: attempt static setup.py parsing with mercator
            items = [self._merge_python_items(mercator_target, data)]
            if items == [None]:
                raise NotABugFatalTaskError(
                    'Found no usable PKG-INFO/metadata.json/requirements.txt')
        else:
            if outermost_only:
                # process only root level manifests (or the ones closest to the root level)
                items = self._data_normalizer.get_outermost_items(
                    data.get('items') or [])
            else:
                items = data.get('items') or []
            self.log.debug('mercator found %i projects, outermost %i',
                           len(data), len(items))

            if ecosystem_object.is_backed_by(EcosystemBackend.maven):
                # for maven we download both Jar and POM, we consider POM to be *the*
                #  source of information and don't want to duplicate info by including
                #  data from pom included in artifact (assuming it's included)
                items = [
                    d for d in items if d['ecosystem'].lower() == 'java-pom'
                ]
            elif ecosystem_object.is_backed_by(EcosystemBackend.npm):
                # ignore other metadata files, e.g. requirements.txt
                items = [d for d in items if d['ecosystem'].lower() == 'npm']
            elif arguments['ecosystem'] == 'go':
                items = [
                    d for d in items if d['ecosystem'].lower() == 'go-glide'
                ]
                if not items:
                    # Mercator found no Go Glide files, run gofedlib
                    items = self.run_gofedlib(topdir=mercator_target,
                                              name=arguments.get('name'),
                                              version=arguments.get('version'),
                                              timeout=timeout)

        result_data['details'] = [
            self._data_normalizer.handle_data(d, keep_path=keep_path)
            for d in items
        ]
        result_data['status'] = 'success'
        return result_data
Beispiel #11
0
    def run_mercator(self,
                     arguments,
                     cache_path,
                     keep_path=False,
                     outermost_only=True,
                     timeout=300,
                     resolve_poms=True):
        result_data = {'status': 'unknown', 'summary': [], 'details': []}
        mercator_target = arguments.get('cache_sources_path', cache_path)

        if arguments['ecosystem'] == 'go':
            # no Go support in Mercator-go yet, we handle it separately here
            tc = TimedCommand([
                'gofedlib-cli', '--dependencies-main',
                '--dependencies-packages', '--dependencies-test',
                '--skip-errors', mercator_target
            ])
            status, data, err = tc.run(timeout=timeout)
        else:
            tc = TimedCommand(['mercator', mercator_target])
            update_env = {
                'MERCATOR_JAVA_RESOLVE_POMS': 'true'
            } if resolve_poms else {}
            status, data, err = tc.run(timeout=timeout,
                                       is_json=True,
                                       update_env=update_env)
        if status != 0:
            self.log.error(err)
            raise FatalTaskError(err)

        ecosystem_object = self.storage.get_ecosystem(arguments['ecosystem'])
        if ecosystem_object.is_backed_by(EcosystemBackend.pypi):
            # TODO: attempt static setup.py parsing with mercator
            items = [self._merge_python_items(mercator_target, data)]
        elif arguments['ecosystem'] == 'go':
            result = {'result': json.loads(data[0])}
            # data normalized expects this
            result['ecosystem'] = 'gofedlib'
            # we only support git now
            result['result']['code_repository'] = {
                'type': 'git',
                'url': 'https://{name}'.format(name=arguments.get('name'))
            }

            result['result']['name'] = arguments.get('name')
            result['result']['version'] = arguments.get('version')
            items = [result]
            main_deps_count = len(result['result'].get('deps-main', []))
            packages_count = len(result['result'].get('deps-packages', []))
            self.log.debug('gofedlib found %i dependencies',
                           main_deps_count + packages_count)
        else:
            if outermost_only:
                # process only root level manifests (or the ones closest to the root level)
                items = self._data_normalizer.get_outermost_items(
                    data.get('items') or [])
            else:
                items = data.get('items') or []
            self.log.debug('mercator found %i projects, outermost %i',
                           len(data), len(items))

            if ecosystem_object.is_backed_by(EcosystemBackend.maven):
                # for maven we download both Jar and POM, we consider POM to be *the*
                #  source of information and don't want to duplicate info by including
                #  data from pom included in artifact (assuming it's included)
                items = [
                    d for d in items if d['ecosystem'].lower() == 'java-pom'
                ]

        result_data['details'] = [
            self._data_normalizer.handle_data(d, keep_path=keep_path)
            for d in items
        ]
        result_data['status'] = 'success'
        return result_data
    def execute(self, arguments):
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        eco = arguments['ecosystem']
        pkg = arguments['name']
        tool_responses = {}
        result_summary = {
            'package_names': [],
            'registered_srpms': [],
            'all_rhn_channels': [],
            'all_rhsm_content_sets': [],
            'all_rhsm_product_names': []
        }
        result_data = {'status': 'error',
                       'summary': result_summary,
                       'details': tool_responses
                       }

        # bail out early; we need access to internal services or the package is
        # from Maven ecosystem, otherwise we can't comment on downstream usage
        is_maven = Ecosystem.by_name(self.storage.session, eco).is_backed_by(EcosystemBackend.maven)
        if not self._is_inside_rh() and not is_maven:
            return result_data

        self.log.debug('Fetching {e}/{p} from Anitya'.format(e=eco, p=pkg))
        res = self._fetch_anitya_project(eco, pkg)
        anitya_rpm_names = []
        anitya_mvn_names = []
        if res is None:
            result_data['status'] = 'error'
        elif res.status_code == 200:
            self.log.debug('Retrieved {e}/{p} from Anitya'.format(e=eco, p=pkg))
            anitya_response = res.json()
            tool_responses['redhat_anitya'] = anitya_response
            # For now, we assume all downstreams are ones we care about
            for entry in anitya_response['packages']:
                if entry['distro'] == RH_RPM_DISTRO_NAME:
                    anitya_rpm_names.append(entry['package_name'])
                elif entry['distro'] == RH_MVN_DISTRO_NAME:
                    anitya_mvn_names.append(entry['package_name'])
                else:
                    self.log.warning(
                        'Unknown distro {d} for downstream package {o} (package {p}) in Anitya'.
                        format(d=entry['distro'], o=entry['package_name'], p=pkg)
                    )
            self.log.debug('Candidate RPM names from Anitya: {}'.format(anitya_rpm_names))
            self.log.debug('Candidate MVN names from Anitya: {}'.format(anitya_mvn_names))
            # TODO: Report 'partial' here and switch to 'success' at the end
            result_data['status'] = 'success'
        else:
            msg = 'Failed to find Anitya project {e}/{p}. Anitya response: {r}'
            self.log.error(msg.format(e=eco, p=pkg, r=res.text))
            result_data['status'] = 'error'

        if self._is_inside_rh():
            # we have candidate downstream name mappings, check them against Brew
            seed_names = anitya_rpm_names or [self._prefix_package_name(pkg, eco)]
            self.log.debug('Checking candidate names in Brew: {}'.format(seed_names))

            args = ['brew-utils-cli', '--version', arguments['version']]
            artifact_hash = self._get_artifact_hash(algorithm='sha256')
            if artifact_hash:
                args += ['--digest', artifact_hash]
            args += seed_names

            self.log.debug("Executing command, timeout={timeout}: {cmd}".format(
                timeout=self._BREWUTILS_CLI_TIMEOUT,
                cmd=args))
            tc = TimedCommand(args)
            status, output, error = tc.run(timeout=self._BREWUTILS_CLI_TIMEOUT)
            self.log.debug("status = %s, error = %s", status, error)
            output = ''.join(output)
            self.log.debug("output = %s", output)
            if not output:
                raise TaskError("Error running command %s" % args)
            brew = json.loads(output)

            result_summary['package_names'] = brew['packages']
            result_summary['registered_srpms'] = brew['response']['registered_srpms']
            tool_responses['brew'] = brew['response']['brew']

            # we have SRPM details, fetch details on where the RPMs are shipped
            tool_responses['pulp_cdn'] = pulp_responses = []
            rhn_channels = set()
            rhsm_content_sets = set()
            rhsm_product_names = set()
            for srpm_summary in result_summary['registered_srpms']:
                srpm_filename = "{n}-{v}-{r}.src.rpm".format(n=srpm_summary['package_name'],
                                                             v=srpm_summary['version'],
                                                             r=srpm_summary['release'])
                cdn_metadata = self._get_cdn_metadata(srpm_filename)
                if cdn_metadata is None:
                    msg = 'Error getting shipping data for {e}/{p} SRPM: {srpm}'
                    self.log.error(msg.format(e=eco, p=pkg, srpm=srpm_filename))
                    continue
                pulp_responses.append(cdn_metadata)
                srpm_summary['published_in'] = cdn_metadata['rhsm_product_names']
                rhn_channels.update(cdn_metadata['rhn_channels'])
                rhsm_content_sets.update(cdn_metadata['rhsm_content_sets'])
                rhsm_product_names.update(cdn_metadata['rhsm_product_names'])
            result_summary['all_rhn_channels'] = sorted(rhn_channels)
            result_summary['all_rhsm_content_sets'] = sorted(rhsm_content_sets)
            result_summary['all_rhsm_product_names'] = sorted(rhsm_product_names)

        self._add_mvn_results(result_summary, anitya_mvn_names, arguments['version'])

        return result_data