def _run_analyzer(self, command, json_output=True): """Run command (analyzer), if a JSON output is expected, parse it. :param command: command to be run (command with argument vector as array) :param json_output: True if output should be parsed :return: status, output, error triplet """ self.log.debug("Executing command, timeout={timeout}: {cmd}".format( timeout=self._CLI_TIMEOUT, cmd=command)) cmd = TimedCommand(command) status, output, error = cmd.run(timeout=self._CLI_TIMEOUT) self.log.debug("status: %d, output: %s, error: %s", status, output, error) if status != 0: self.log.warning( "Executing command failed, return value: %d, stderr: '%s' ", status, error) # Some tools such as complexity-report write zero bytes to output (they # are propagated from sources like for npm/glob/7.0.3). This caused # failures when pushing results to Postgres as Postgres cannot store # null bytes in results. Let's be safe here. output = list(line.replace('\\u0000', '\\\\0') for line in output) if json_output: if output: output = "".join(output) output = json.loads(output) else: output = {} return status, output, error
def execute(self, arguments): """Task code. :param arguments: dictionary with task arguments :return: {}, results """ self._strict_assert(arguments.get('ecosystem')) self._strict_assert(arguments.get('name')) self._strict_assert(arguments.get('version')) cache_path = ObjectCache.get_from_dict(arguments).get_source_tarball() results = [] for path in get_all_files_from(cache_path, path_filter=skip_git_files): self.log.debug("path = %s", path) bw = TimedCommand(['binwalk', '-B', path]) status, output, error = bw.run(timeout=60) self.log.debug("status = %s, error = %s", status, error) self.log.debug("output = %s", output) parsed_binwalk = self.parse_binwalk(output) results.append({ "path": os.path.relpath(path, cache_path), "output": parsed_binwalk, }) return {'summary': [], 'status': 'success', 'details': results}
def run_timed_command(cmd, file): """Run timed command and write output to file. :param cmd: command to run :param file: output file :return: """ timed_cmd = TimedCommand(cmd) status, output, _ = timed_cmd.run(timeout=3600) if status != 0 or not file.is_file(): # all errors are in stdout, not stderr raise TaskError(output)
def run_scancode(scan_path): """Run scancode tool.""" result_data = { 'status': 'unknown', 'summary': {}, 'details': {}, 'command': 'N/A' } command = [ path.join(configuration.SCANCODE_PATH, 'scancode'), # Scan for licenses '--license', # Do not return license matches with scores lower than this score '--license-score', configuration.SCANCODE_LICENSE_SCORE, # Files without findings are omitted '--only-findings', # Use n parallel processes '--processes', configuration.SCANCODE_PROCESSES, # Do not print summary or progress messages '--quiet', # Strip the root directory segment of all paths '--strip-root', # Stop scanning a file if scanning takes longer than a timeout in seconds '--timeout', configuration.SCANCODE_TIMEOUT, scan_path ] for ignore_pattern in configuration.SCANCODE_IGNORE: command += ['--ignore', '{}'.format(ignore_pattern)] with username(): tc = TimedCommand(command) status, output, error = tc.run(is_json=True, timeout=1200) if status != 0: return { "status": status, "output": output, "error": error, "command": command } details = LicenseCheckTask.process_output(output) result_data['details'] = details result_data['status'] = 'success' result_data['summary'] = { 'sure_licenses': list(details['licenses'].keys()) } result_data['command'] = command return result_data
def execute(self, arguments): self._strict_assert(arguments.get('ecosystem')) self._strict_assert(arguments.get('name')) self._strict_assert(arguments.get('version')) result_data = {'status': 'unknown', 'summary': [], 'details': {}} if self._is_valid_ecosystem(arguments['ecosystem']): hub = self._get_hub() # BlackDuck project doesn't have a notion of ecosystem, so we need to # namespace the project names ourselves, so for package `crumb` in the NPM ecosystem # we'll end up with the name `npm-crumb` project = self._get_project_name(arguments) version = arguments['version'] # Check if the given project had already been scanned data = self._release_data(hub, project, version) if not data and self._allow_cli_scan: self.log.debug("No data available for project {p} {v}".format( p=project, v=version)) # No data available, issue a new scan and re-query release data source_tarball_path = ObjectCache.get_from_dict( arguments).get_source_tarball() command = self._prepare_command(project, version, source_tarball_path) self.log.debug( "Executing command, timeout={timeout}: {cmd}".format( timeout=self._BLACKDUCK_CLI_TIMEOUT, cmd=command)) bd = TimedCommand(command) status, output, error = \ bd.run(timeout=self._BLACKDUCK_CLI_TIMEOUT, update_env={'BD_HUB_PASSWORD': self.configuration.BLACKDUCK_PASSWORD}) self.log.debug("status = %s, error = %s", status, error) self.log.debug("output = %s", output) data = self._release_data(hub, project, version) self.log.debug("Release data for project {p} {v}: {d}".format( p=project, v=version, d=data)) result_data['details'] = data result_data['status'] = 'success' if data else 'error' else: result_data['status'] = 'error' return result_data
def run_gofedlib(self, topdir, timeout): """Run gofedlib-cli to extract dependencies from golang sources.""" tc = TimedCommand([ 'gofedlib-cli', '--dependencies-main', '--dependencies-packages', '--dependencies-test', '--skip-errors', topdir ]) status, data, err = tc.run(timeout=timeout) if status: raise FatalTaskError('gofedlib-cli failed: {err}'.format(err=err)) result = json.loads(data[0]) main_deps_count = len(result.get('deps-main', [])) packages_count = len(result.get('deps-packages', [])) self.log.debug('gofedlib found %i dependencies', main_deps_count + packages_count) return [{'ecosystem': 'gofedlib', 'result': result}]
def run_gofedlib(self, topdir, name, version, timeout): """Run gofedlib-cli to extract dependencies from golang sources.""" tc = TimedCommand([ 'gofedlib-cli', '--dependencies-main', '--dependencies-packages', '--dependencies-test', '--skip-errors', topdir ]) status, data, err = tc.run(timeout=timeout) result = json.loads(data[0]) main_deps_count = len(result.get('deps-main', [])) packages_count = len(result.get('deps-packages', [])) self.log.debug('gofedlib found %i dependencies', main_deps_count + packages_count) result['code_repository'] = { 'type': 'git', 'url': 'https://{name}'.format(name=name) } result['name'] = name result['version'] = version return [{'ecosystem': 'gofedlib', 'result': result}]
def extract_dependencies(github_repo, github_sha): """Extract the dependencies information. Currently assuming repository is maven repository. """ with TemporaryDirectory() as workdir: repo = Git.clone(url=github_repo, path=workdir, timeout=3600) repo.reset(revision=github_sha, hard=True) with cwd(repo.repo_path): output_file = Path.cwd() / "dependency-tree.txt" cmd = ["mvn", "org.apache.maven.plugins:maven-dependency-plugin:3.0.2:tree", "-DoutputType=dot", "-DoutputFile={filename}".format(filename=output_file), "-DappendOutput=true"] timed_cmd = TimedCommand(cmd) status, output, _ = timed_cmd.run(timeout=3600) if status != 0 or not output_file.is_file(): # all errors are in stdout, not stderr raise TaskError(output) with output_file.open() as f: return GithubDependencyTreeTask.parse_maven_dependency_tree(f.readlines())
def get_maven_dependencies(): """Get direct and indirect dependencies from pom.xml by using maven dependency tree plugin. :return: set of direct and indirect dependencies """ output_file = Path.cwd() / "dependency-tree.txt" cmd = [ "mvn", "org.apache.maven.plugins:maven-dependency-plugin:3.0.2:tree", "-DoutputType=dot", "-DoutputFile={filename}".format(filename=output_file), "-DappendOutput=true" ] timed_cmd = TimedCommand(cmd) status, output, _ = timed_cmd.run(timeout=3600) if status != 0 or not output_file.is_file(): # all errors are in stdout, not stderr raise TaskError(output) with output_file.open() as f: return GithubDependencyTreeTask.parse_maven_dependency_tree( f.readlines())
def run_mercator(self, arguments, cache_path, keep_path=False, outermost_only=True, timeout=300, resolve_poms=True): """Run mercator tool.""" # TODO: reduce cyclomatic complexity result_data = {'status': 'unknown', 'summary': [], 'details': []} mercator_target = arguments.get('cache_sources_path', cache_path) tc = TimedCommand(['mercator', mercator_target]) update_env = { 'MERCATOR_JAVA_RESOLVE_POMS': 'true' } if resolve_poms else {} status, data, err = tc.run(timeout=timeout, is_json=True, update_env=update_env) if status != 0: self.log.error(err) raise FatalTaskError(err) ecosystem_object = self.storage.get_ecosystem(arguments['ecosystem']) if ecosystem_object.is_backed_by(EcosystemBackend.pypi): # TODO: attempt static setup.py parsing with mercator items = [self._merge_python_items(mercator_target, data)] if items == [None]: raise NotABugFatalTaskError( 'Found no usable PKG-INFO/metadata.json/requirements.txt') else: if outermost_only: # process only root level manifests (or the ones closest to the root level) items = self._data_normalizer.get_outermost_items( data.get('items') or []) else: items = data.get('items') or [] self.log.debug('mercator found %i projects, outermost %i', len(data), len(items)) if ecosystem_object.is_backed_by(EcosystemBackend.maven): # for maven we download both Jar and POM, we consider POM to be *the* # source of information and don't want to duplicate info by including # data from pom included in artifact (assuming it's included) items = [ d for d in items if d['ecosystem'].lower() == 'java-pom' ] elif ecosystem_object.is_backed_by(EcosystemBackend.npm): # ignore other metadata files, e.g. requirements.txt items = [d for d in items if d['ecosystem'].lower() == 'npm'] elif arguments['ecosystem'] == 'go': items = [ d for d in items if d['ecosystem'].lower() == 'go-glide' ] if not items: # Mercator found no Go Glide files, run gofedlib items = self.run_gofedlib(topdir=mercator_target, name=arguments.get('name'), version=arguments.get('version'), timeout=timeout) result_data['details'] = [ self._data_normalizer.handle_data(d, keep_path=keep_path) for d in items ] result_data['status'] = 'success' return result_data
def run_mercator(self, arguments, cache_path, keep_path=False, outermost_only=True, timeout=300, resolve_poms=True): result_data = {'status': 'unknown', 'summary': [], 'details': []} mercator_target = arguments.get('cache_sources_path', cache_path) if arguments['ecosystem'] == 'go': # no Go support in Mercator-go yet, we handle it separately here tc = TimedCommand([ 'gofedlib-cli', '--dependencies-main', '--dependencies-packages', '--dependencies-test', '--skip-errors', mercator_target ]) status, data, err = tc.run(timeout=timeout) else: tc = TimedCommand(['mercator', mercator_target]) update_env = { 'MERCATOR_JAVA_RESOLVE_POMS': 'true' } if resolve_poms else {} status, data, err = tc.run(timeout=timeout, is_json=True, update_env=update_env) if status != 0: self.log.error(err) raise FatalTaskError(err) ecosystem_object = self.storage.get_ecosystem(arguments['ecosystem']) if ecosystem_object.is_backed_by(EcosystemBackend.pypi): # TODO: attempt static setup.py parsing with mercator items = [self._merge_python_items(mercator_target, data)] elif arguments['ecosystem'] == 'go': result = {'result': json.loads(data[0])} # data normalized expects this result['ecosystem'] = 'gofedlib' # we only support git now result['result']['code_repository'] = { 'type': 'git', 'url': 'https://{name}'.format(name=arguments.get('name')) } result['result']['name'] = arguments.get('name') result['result']['version'] = arguments.get('version') items = [result] main_deps_count = len(result['result'].get('deps-main', [])) packages_count = len(result['result'].get('deps-packages', [])) self.log.debug('gofedlib found %i dependencies', main_deps_count + packages_count) else: if outermost_only: # process only root level manifests (or the ones closest to the root level) items = self._data_normalizer.get_outermost_items( data.get('items') or []) else: items = data.get('items') or [] self.log.debug('mercator found %i projects, outermost %i', len(data), len(items)) if ecosystem_object.is_backed_by(EcosystemBackend.maven): # for maven we download both Jar and POM, we consider POM to be *the* # source of information and don't want to duplicate info by including # data from pom included in artifact (assuming it's included) items = [ d for d in items if d['ecosystem'].lower() == 'java-pom' ] result_data['details'] = [ self._data_normalizer.handle_data(d, keep_path=keep_path) for d in items ] result_data['status'] = 'success' return result_data
def execute(self, arguments): self._strict_assert(arguments.get('ecosystem')) self._strict_assert(arguments.get('name')) self._strict_assert(arguments.get('version')) eco = arguments['ecosystem'] pkg = arguments['name'] tool_responses = {} result_summary = { 'package_names': [], 'registered_srpms': [], 'all_rhn_channels': [], 'all_rhsm_content_sets': [], 'all_rhsm_product_names': [] } result_data = {'status': 'error', 'summary': result_summary, 'details': tool_responses } # bail out early; we need access to internal services or the package is # from Maven ecosystem, otherwise we can't comment on downstream usage is_maven = Ecosystem.by_name(self.storage.session, eco).is_backed_by(EcosystemBackend.maven) if not self._is_inside_rh() and not is_maven: return result_data self.log.debug('Fetching {e}/{p} from Anitya'.format(e=eco, p=pkg)) res = self._fetch_anitya_project(eco, pkg) anitya_rpm_names = [] anitya_mvn_names = [] if res is None: result_data['status'] = 'error' elif res.status_code == 200: self.log.debug('Retrieved {e}/{p} from Anitya'.format(e=eco, p=pkg)) anitya_response = res.json() tool_responses['redhat_anitya'] = anitya_response # For now, we assume all downstreams are ones we care about for entry in anitya_response['packages']: if entry['distro'] == RH_RPM_DISTRO_NAME: anitya_rpm_names.append(entry['package_name']) elif entry['distro'] == RH_MVN_DISTRO_NAME: anitya_mvn_names.append(entry['package_name']) else: self.log.warning( 'Unknown distro {d} for downstream package {o} (package {p}) in Anitya'. format(d=entry['distro'], o=entry['package_name'], p=pkg) ) self.log.debug('Candidate RPM names from Anitya: {}'.format(anitya_rpm_names)) self.log.debug('Candidate MVN names from Anitya: {}'.format(anitya_mvn_names)) # TODO: Report 'partial' here and switch to 'success' at the end result_data['status'] = 'success' else: msg = 'Failed to find Anitya project {e}/{p}. Anitya response: {r}' self.log.error(msg.format(e=eco, p=pkg, r=res.text)) result_data['status'] = 'error' if self._is_inside_rh(): # we have candidate downstream name mappings, check them against Brew seed_names = anitya_rpm_names or [self._prefix_package_name(pkg, eco)] self.log.debug('Checking candidate names in Brew: {}'.format(seed_names)) args = ['brew-utils-cli', '--version', arguments['version']] artifact_hash = self._get_artifact_hash(algorithm='sha256') if artifact_hash: args += ['--digest', artifact_hash] args += seed_names self.log.debug("Executing command, timeout={timeout}: {cmd}".format( timeout=self._BREWUTILS_CLI_TIMEOUT, cmd=args)) tc = TimedCommand(args) status, output, error = tc.run(timeout=self._BREWUTILS_CLI_TIMEOUT) self.log.debug("status = %s, error = %s", status, error) output = ''.join(output) self.log.debug("output = %s", output) if not output: raise TaskError("Error running command %s" % args) brew = json.loads(output) result_summary['package_names'] = brew['packages'] result_summary['registered_srpms'] = brew['response']['registered_srpms'] tool_responses['brew'] = brew['response']['brew'] # we have SRPM details, fetch details on where the RPMs are shipped tool_responses['pulp_cdn'] = pulp_responses = [] rhn_channels = set() rhsm_content_sets = set() rhsm_product_names = set() for srpm_summary in result_summary['registered_srpms']: srpm_filename = "{n}-{v}-{r}.src.rpm".format(n=srpm_summary['package_name'], v=srpm_summary['version'], r=srpm_summary['release']) cdn_metadata = self._get_cdn_metadata(srpm_filename) if cdn_metadata is None: msg = 'Error getting shipping data for {e}/{p} SRPM: {srpm}' self.log.error(msg.format(e=eco, p=pkg, srpm=srpm_filename)) continue pulp_responses.append(cdn_metadata) srpm_summary['published_in'] = cdn_metadata['rhsm_product_names'] rhn_channels.update(cdn_metadata['rhn_channels']) rhsm_content_sets.update(cdn_metadata['rhsm_content_sets']) rhsm_product_names.update(cdn_metadata['rhsm_product_names']) result_summary['all_rhn_channels'] = sorted(rhn_channels) result_summary['all_rhsm_content_sets'] = sorted(rhsm_content_sets) result_summary['all_rhsm_product_names'] = sorted(rhsm_product_names) self._add_mvn_results(result_summary, anitya_mvn_names, arguments['version']) return result_data