예제 #1
0
def get_response(url, headers=None, sleep_time=2, retry_count=10):
    """Wrap requests which tries to get response.

    :param url: URL where to do the request
    :param headers: additional headers for request
    :param sleep_time: sleep time between retries
    :param retry_count: number of retries
    :return: content of response's json
    """
    try:
        for _ in range(retry_count):
            response = requests.get(url, headers=headers)
            response.raise_for_status()
            if response.status_code == 204:
                # json() below would otherwise fail with JSONDecodeError
                raise HTTPError('No content')
            response = response.json()
            if response:
                return response
            time.sleep(sleep_time)
        else:
            raise TaskError("Number of retries exceeded")
    except HTTPError as err:
        message = "Failed to get results from {url} with {err}".format(url=url, err=err)
        logger.error(message)
        raise TaskError(message) from err
예제 #2
0
    def create_repo_node_and_get_cve(self, github_repo, deps_list):
        """Create a repository node in the graphdb and create its edges to all deps.

        :param github_repo:
        :param dependencies:
        :return: {}, gremlin_response
        """
        gremlin_str = (
            "repo=g.V().has('repo_url', '{repo_url}').tryNext().orElseGet{{"
            "graph.addVertex('vertex_label', 'Repo', 'repo_url', '{repo_url}')}};"
            "g.V(repo).outE('has_dependency').drop().iterate();"
            "g.V(repo).outE('has_transitive_dependency').drop().iterate();".
            format(repo_url=github_repo))

        # Create an edge between repo -> direct dependencies
        for pkg in deps_list.get('direct'):
            ecosystem = pkg.split(':')[0]
            version = pkg.split(':')[-1]
            name = pkg.replace(ecosystem + ':', '').replace(':' + version, '')
            gremlin_str += (
                "ver=g.V().has('pecosystem', '{ecosystem}').has('pname', '{name}')."
                "has('version', '{version}');ver.hasNext() && "
                "g.V(repo).next().addEdge('has_dependency', ver.next());".
                format(ecosystem=ecosystem, name=name, version=version))

        # Create an edge between repo -> transitive dependencies
        for pkg in deps_list.get('transitive'):
            ecosystem = pkg.split(':')[0]
            version = pkg.split(':')[-1]
            name = pkg.replace(ecosystem + ':', '').replace(':' + version, '')
            gremlin_str += (
                "ver=g.V().has('pecosystem', '{ecosystem}').has('pname', '{name}')."
                "has('version', '{version}');ver.hasNext() && "
                "g.V(repo).next().addEdge('has_transitive_dependency', ver.next());"
                .format(ecosystem=ecosystem, name=name, version=version))

        # Traverse the Repo to Direct/Transitive dependencies that have CVE's and report them
        gremlin_str += (
            "g.V(repo).as('rp').outE('has_dependency','has_transitive_dependency')"
            ".as('ed').inV().as('epv').select('rp','ed','epv').by(valueMap(true));"
        )
        payload = {"gremlin": gremlin_str}
        try:
            rawresp = requests.post(url=GREMLIN_SERVER_URL_REST, json=payload)
            resp = rawresp.json()
            self.log.info('######## Gremlin Response %r' % resp)
            if rawresp.status_code != 200:
                raise TaskError(
                    "Error creating repository node for {repo_url} - "
                    "{resp}".format(repo_url=github_repo, resp=resp))

        except Exception:
            self.log.error(traceback.format_exc())
            raise TaskError(
                "Error creating repository node for {repo_url}".format(
                    repo_url=github_repo))
            return None

        return resp
예제 #3
0
    def _find_blackduck_cli_root(self):
        """
        Find the base directory where the BlackDuck CLI got
        extracted

        :return: str, path to the CLI root
        """
        base = self.configuration.BLACKDUCK_PATH
        dirs = listdir(base)
        if not dirs:
            raise TaskError("Unable to find BlackDuck CLI directory")
        if len(dirs) > 1:
            raise TaskError("More than 1 BlackDuck CLI directory")

        return path.join(base, dirs.pop())
예제 #4
0
    def clone(cls, url, path, depth=None, branch=None, single_branch=False):
        """
        clone repository provided as url to specific path

        :param url: str
        :param path: str
        :param depth: str
        :param branch: str
        :return: instance of Git()
        """
        orig_url = url
        cls.config()
        # git clone doesn't understand urls starting with: git+ssh, git+http, git+https
        url = url2git_repo(url)
        cmd = ["git", "clone", url, path]
        if depth is not None:
            cmd.extend(["--depth", depth])
        if branch is not None:
            cmd.extend(["--branch", branch])
        if single_branch:
            cmd.extend(["--single-branch"])
        try:
            TimedCommand.get_command_output(cmd, graceful=False)
        except TaskError as exc:
            raise TaskError("Unable to clone: %s" % orig_url) from exc
        return cls(path=path)
예제 #5
0
def get_command_output(args, graceful=True, is_json=False, **kwargs):
    """Improved version of subprocess.check_output.

    :param graceful: bool, if False, raise Exception when command fails
    :param is_json: bool, if True, return decoded json

    :return: list of strings, output which command emitted
    """
    logger.debug("running command %s", args)
    try:
        # Using universal_newlines mostly for the side-effect of decoding
        # the output as UTF-8 text on Python 3.x
        out = check_output(args, universal_newlines=True, **kwargs)
    except (CalledProcessError, TimeoutExpired) as ex:
        # TODO: we may want to use subprocess.Popen to be able to also print stderr here
        #  (while not mixing it with stdout that is returned if the subprocess succeeds)
        if isinstance(ex, TimeoutExpired):
            logger.warning("command %s timed out:\n%s", args, ex.output)
        else:
            logger.warning("command %s ended with %s\n%s", args, ex.returncode,
                           ex.output)

        if not graceful:
            logger.error("exception is fatal")
            raise TaskError("Error during running command %s: %r" %
                            (args, ex.output))
        else:
            logger.debug("Ignoring because graceful flag is set")
        return []
    else:
        if is_json:
            # FIXME: some error handling here would be great
            return json.loads(out)
        else:
            return [f for f in out.split('\n') if f]  # py2 & 3 compat
예제 #6
0
    def extract_dependencies(github_repo, github_sha):
        """Extract the dependencies information.

        Currently assuming repository is maven/npm/python repository.

        :param github_repo: repository url
        :param github_sha: commit hash
        :return: set of direct (and indirect) dependencies
        """
        with TemporaryDirectory() as workdir:
            repo = Git.clone(url=github_repo, path=workdir, timeout=3600)
            repo.reset(revision=github_sha, hard=True)
            with cwd(repo.repo_path):
                # TODO: Make this task also work for files not present in root directory.

                # First change the package-lock.json to npm-shrinkwrap.json
                GithubDependencyTreeTask.change_package_lock_to_shrinkwrap()

                if peek(Path.cwd().glob("pom.xml")):
                    return GithubDependencyTreeTask.get_maven_dependencies()
                elif peek(Path.cwd().glob("npm-shrinkwrap.json")) \
                        or peek(Path.cwd().glob("package.json")):
                    return GithubDependencyTreeTask.get_npm_dependencies(
                        repo.repo_path)
                elif peek(Path.cwd().glob("requirements.txt")):
                    return GithubDependencyTreeTask.get_python_dependencies(
                        repo.repo_path)
                elif peek(Path.cwd().glob("glide.lock")):
                    return GithubDependencyTreeTask.get_go_glide_dependencies(
                        repo.repo_path)
                elif peek(Path.cwd().glob("Gopkg.lock")):
                    return GithubDependencyTreeTask.get_go_pkg_dependencies()
                else:
                    raise TaskError("Please provide maven or npm or "
                                    "python or Go repository for scanning!")
예제 #7
0
    def execute(self, arguments=None):
        """Task code.

        :param arguments: dictionary with task arguments
        :return: {}, results
        """
        self.log.info("Arguments passed from flow: {}".format(arguments))
        self._strict_assert(arguments.get('service_token'))

        github_repo = arguments.get('github_repo').strip()
        dependencies = []
        repo_cves = []

        if len(arguments.get('epv_list', [])):
            # self._strict_assert(arguments.get('epv_list'))
            for epv in arguments.get('epv_list'):
                dependencies.append('{ecosystem}:{package}:{version}'.format(
                    ecosystem=epv.get('ecosystem'),
                    package=epv.get('name'),
                    version=epv.get('version')))
            self.log.info('######## Dependencies list: %r' % dependencies)
            try:
                repo_cves = self.get_cve(dependencies)
            except TaskError:
                raise TaskError('Failed to get CVEs')
        else:
            dependencies = GithubDependencyTreeTask.extract_dependencies(
                github_repo=github_repo, user_flow=True)
            self.log.info('######## Deps list %r' % dependencies)
            try:
                # forward only the available dependencies in the system. Unknown
                # dependencies are not going to be ingested for osioUserNotificationFlow.
                repo_cves = self.create_repo_node_and_get_cve(
                    github_repo, dependencies)
                self.log.info('######## repo_cves %r' % repo_cves)
            except TaskError:
                raise TaskError('Failed to Create Repo Node')

        report = self.generate_report(repo_cves=repo_cves,
                                      deps_list=dependencies)
        return {
            'report': report,
            'service_token': arguments['service_token'],
            'dependencies': dependencies
        }
예제 #8
0
    def execute(self, arguments):
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))

        eco = arguments['ecosystem']
        pkg = arguments['name']
        homepage = self._get_project_homepage(eco, pkg)
        self.log.info('Registering project {e}/{p} to Anitya'.format(e=eco,
                                                                     p=pkg))
        res = self._create_anitya_project(eco, pkg, homepage)
        if res.status_code == 200:
            self.log.info(
                'Project {e}/{p} had already been registered to Anitya'.format(
                    e=eco, p=pkg))
        elif res.status_code == 201:
            self.log.info(
                'Project {e}/{p} was successfully registered to Anitya'.format(
                    e=eco, p=pkg))
        else:
            self.log.error(
                'Failed to create Anitya project {e}/{p}. Anitya response: {r}'
                .format(e=eco, p=pkg, r=res.text))
            return None
            # TODO: When we move to a proper workflow manager, we'll want to raise TaskError
            #  here instead of just logging an error. Right now we don't want a problem
            #  in AnityaTask to shut down the rest of analysis phases.
            # raise TaskError('Failed to create Anitya project {e}/{p}. Anitya response: {r}'.
            #                 format(e=eco, p=pkg, r=res.text))
        self.log.info('Project {e}/{p} created successfully'.format(e=eco,
                                                                    p=pkg))

        self.log.debug('About to add downstream mapping for %s/%s to Anitya' %
                       (eco, pkg))
        distro_pkgs = {}
        distro_pkgs.update([self._get_downstream_rpm_pkgs(eco, pkg)])
        if self.storage.get_ecosystem(eco).is_backed_by(
                EcosystemBackend.maven):
            distro_pkgs.update([self._get_downstream_mvn_pkgs(eco, pkg)])
        for distro, package_names in distro_pkgs.items():
            for package_name in package_names:
                res = self._add_downstream_mapping(eco, pkg, distro,
                                                   package_name)
                if res.status_code == 200:
                    self.log.info(
                        'Downstream mapping %s/%s has already been added to project %s'
                        % (distro, package_name, pkg))
                elif res.status_code == 201:
                    self.log.info(
                        'Downstream mapping %s/%s was added to project %s' %
                        (distro, package_name, pkg))
                else:
                    raise TaskError(
                        'Failed to add downstream mapping %s/%s to project %s'
                        % (distro, package_name, pkg))

        # we don't want to save any data, so return None
        return None
예제 #9
0
    def _resolve_dependency(ecosystem, dep):
        ret = {
            'ecosystem': ecosystem.name,
            'declaration': dep,
            'resolved_at': json_serial(datetime.datetime.utcnow())
        }

        # first, if this is a Github dependency, return it right away (we don't resolve these yet)
        if ' ' in dep:
            # we have both package name and version (version can be an URL)
            name, spec = dep.split(' ', 1)
            if gh_dep.match(spec):
                ret['name'] = name
                ret['version'] = 'https://github.com/' + spec
            elif urllib.parse.urlparse(spec).scheme is not '':
                ret['name'] = name
                ret['version'] = spec
        else:
            if gh_dep.match(dep):
                ret['name'] = 'https://github.com/' + dep
                ret['version'] = None
            elif urllib.parse.urlparse(dep).scheme is not '':
                ret['name'] = dep
                ret['version'] = None

        if 'name' in ret:
            return ret

        # second, figure out what is the latest upstream version matching the spec and return it
        solver = get_ecosystem_solver(ecosystem)
        pkgspec = solver.solve([dep])

        if not pkgspec:
            raise TaskError("invalid dependency: {}".format(dep))

        package, version = pkgspec.popitem()
        if not version:
            raise TaskError("could not resolve {}".format(dep))

        ret['name'] = package
        ret['version'] = version
        return ret
예제 #10
0
    def run_timed_command(cmd, file):
        """Run timed command and write output to file.

        :param cmd: command to run
        :param file: output file
        :return:
        """
        timed_cmd = TimedCommand(cmd)
        status, output, _ = timed_cmd.run(timeout=3600)
        if status != 0 or not file.is_file():
            # all errors are in stdout, not stderr
            raise TaskError(output)
예제 #11
0
    def clone(cls,
              url,
              path,
              timeout=300,
              depth=None,
              branch=None,
              single_branch=False):
        """Clone repository provided as url to specific path.

        :param url: str
        :param path: str
        :param timeout: int
        :param depth: str
        :param branch: str
        :param single_branch: bool, only checkout single branch
        :return: instance of Git()
        """
        orig_url = url
        # git clone doesn't understand urls starting with: git+ssh, git+http, git+https
        url = url2git_repo(url)

        orig_path = path
        path = Path(path)
        mode = 0
        if path.is_dir():
            mode = path.stat().st_mode

        cmd = ["git", "clone", url, orig_path]
        if depth is not None:
            cmd.extend(["--depth", depth])
        if branch is not None:
            cmd.extend(["--branch", branch])
        if single_branch:
            cmd.extend(["--single-branch"])
        try:
            cls.config()
            TimedCommand.get_command_output(cmd,
                                            graceful=False,
                                            timeout=timeout)
        except TaskError as exc:
            if not path.is_dir() and mode:
                # 'git clone repo dir/' deletes (no way to turn this off) dir/ if cloning fails.
                # This might confuse caller of this method, so we recreate the dir on error here.
                try:
                    path.mkdir(mode)
                except OSError:
                    logger.error("Unable to re-create dir: %s", str(path))
            raise TaskError("Unable to clone: %s" % orig_url) from exc
        return cls(path=orig_path)
예제 #12
0
    def execute(self, arguments):
        hub = self._get_hub()

        self.log.info('Determining if data is already available at BD Hub ...')
        if not self._data_ready(hub, self._get_project_name(arguments),
                                arguments['version']):
            self.log.info('Data not available yet at BD Hub, retrying ...')
            raise BlackDuckDataNotReady(self._get_project_name(arguments),
                                        arguments['version'])
        self.log.info('Data is available at BD Hub, extracting ...')

        data = super().execute(arguments)
        if not data['details']:
            raise TaskError("No data from Hub")

        return data
    def _collect_dependencies(self):
        """Return all dependencies for current analysis flow (operates on parent mercator result).

        :return: List[str], list of dependencies
        """
        wr = self.parent_task_result('metadata')
        if not isinstance(wr, dict):
            raise TaskError('metadata task result has unexpected type: {}; expected dict'.
                            format(type(wr)))

        # there can be details about multiple manifests in the metadata,
        # therefore we will collect dependency specifications from all of them
        # and exclude obvious duplicates along the way
        dependencies = list({dep for m in wr.get('details', []) if m.get('dependencies')
                             for dep in m.get('dependencies', [])})
        return dependencies
    def extract_dependencies(github_repo, github_sha):
        """Extract the dependencies information.

        Currently assuming repository is maven repository.
        """
        with TemporaryDirectory() as workdir:
            repo = Git.clone(url=github_repo, path=workdir, timeout=3600)
            repo.reset(revision=github_sha, hard=True)
            with cwd(repo.repo_path):
                output_file = Path.cwd() / "dependency-tree.txt"
                cmd = ["mvn", "org.apache.maven.plugins:maven-dependency-plugin:3.0.2:tree",
                       "-DoutputType=dot",
                       "-DoutputFile={filename}".format(filename=output_file),
                       "-DappendOutput=true"]
                timed_cmd = TimedCommand(cmd)
                status, output, _ = timed_cmd.run(timeout=3600)
                if status != 0 or not output_file.is_file():
                    # all errors are in stdout, not stderr
                    raise TaskError(output)
                with output_file.open() as f:
                    return GithubDependencyTreeTask.parse_maven_dependency_tree(f.readlines())
예제 #15
0
    def get_maven_dependencies():
        """Get direct and indirect dependencies from pom.xml by using maven dependency tree plugin.

        :return: set of direct and indirect dependencies
        """
        output_file = Path.cwd() / "dependency-tree.txt"
        cmd = [
            "mvn",
            "org.apache.maven.plugins:maven-dependency-plugin:3.0.2:tree",
            "-DoutputType=dot",
            "-DoutputFile={filename}".format(filename=output_file),
            "-DappendOutput=true"
        ]
        timed_cmd = TimedCommand(cmd)
        status, output, _ = timed_cmd.run(timeout=3600)
        if status != 0 or not output_file.is_file():
            # all errors are in stdout, not stderr
            raise TaskError(output)
        with output_file.open() as f:
            return GithubDependencyTreeTask.parse_maven_dependency_tree(
                f.readlines())
    def execute(self, arguments):
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        eco = arguments['ecosystem']
        pkg = arguments['name']
        tool_responses = {}
        result_summary = {
            'package_names': [],
            'registered_srpms': [],
            'all_rhn_channels': [],
            'all_rhsm_content_sets': [],
            'all_rhsm_product_names': []
        }
        result_data = {'status': 'error',
                       'summary': result_summary,
                       'details': tool_responses
                       }

        # bail out early; we need access to internal services or the package is
        # from Maven ecosystem, otherwise we can't comment on downstream usage
        is_maven = Ecosystem.by_name(self.storage.session, eco).is_backed_by(EcosystemBackend.maven)
        if not self._is_inside_rh() and not is_maven:
            return result_data

        self.log.debug('Fetching {e}/{p} from Anitya'.format(e=eco, p=pkg))
        res = self._fetch_anitya_project(eco, pkg)
        anitya_rpm_names = []
        anitya_mvn_names = []
        if res is None:
            result_data['status'] = 'error'
        elif res.status_code == 200:
            self.log.debug('Retrieved {e}/{p} from Anitya'.format(e=eco, p=pkg))
            anitya_response = res.json()
            tool_responses['redhat_anitya'] = anitya_response
            # For now, we assume all downstreams are ones we care about
            for entry in anitya_response['packages']:
                if entry['distro'] == RH_RPM_DISTRO_NAME:
                    anitya_rpm_names.append(entry['package_name'])
                elif entry['distro'] == RH_MVN_DISTRO_NAME:
                    anitya_mvn_names.append(entry['package_name'])
                else:
                    self.log.warning(
                        'Unknown distro {d} for downstream package {o} (package {p}) in Anitya'.
                        format(d=entry['distro'], o=entry['package_name'], p=pkg)
                    )
            self.log.debug('Candidate RPM names from Anitya: {}'.format(anitya_rpm_names))
            self.log.debug('Candidate MVN names from Anitya: {}'.format(anitya_mvn_names))
            # TODO: Report 'partial' here and switch to 'success' at the end
            result_data['status'] = 'success'
        else:
            msg = 'Failed to find Anitya project {e}/{p}. Anitya response: {r}'
            self.log.error(msg.format(e=eco, p=pkg, r=res.text))
            result_data['status'] = 'error'

        if self._is_inside_rh():
            # we have candidate downstream name mappings, check them against Brew
            seed_names = anitya_rpm_names or [self._prefix_package_name(pkg, eco)]
            self.log.debug('Checking candidate names in Brew: {}'.format(seed_names))

            args = ['brew-utils-cli', '--version', arguments['version']]
            artifact_hash = self._get_artifact_hash(algorithm='sha256')
            if artifact_hash:
                args += ['--digest', artifact_hash]
            args += seed_names

            self.log.debug("Executing command, timeout={timeout}: {cmd}".format(
                timeout=self._BREWUTILS_CLI_TIMEOUT,
                cmd=args))
            tc = TimedCommand(args)
            status, output, error = tc.run(timeout=self._BREWUTILS_CLI_TIMEOUT)
            self.log.debug("status = %s, error = %s", status, error)
            output = ''.join(output)
            self.log.debug("output = %s", output)
            if not output:
                raise TaskError("Error running command %s" % args)
            brew = json.loads(output)

            result_summary['package_names'] = brew['packages']
            result_summary['registered_srpms'] = brew['response']['registered_srpms']
            tool_responses['brew'] = brew['response']['brew']

            # we have SRPM details, fetch details on where the RPMs are shipped
            tool_responses['pulp_cdn'] = pulp_responses = []
            rhn_channels = set()
            rhsm_content_sets = set()
            rhsm_product_names = set()
            for srpm_summary in result_summary['registered_srpms']:
                srpm_filename = "{n}-{v}-{r}.src.rpm".format(n=srpm_summary['package_name'],
                                                             v=srpm_summary['version'],
                                                             r=srpm_summary['release'])
                cdn_metadata = self._get_cdn_metadata(srpm_filename)
                if cdn_metadata is None:
                    msg = 'Error getting shipping data for {e}/{p} SRPM: {srpm}'
                    self.log.error(msg.format(e=eco, p=pkg, srpm=srpm_filename))
                    continue
                pulp_responses.append(cdn_metadata)
                srpm_summary['published_in'] = cdn_metadata['rhsm_product_names']
                rhn_channels.update(cdn_metadata['rhn_channels'])
                rhsm_content_sets.update(cdn_metadata['rhsm_content_sets'])
                rhsm_product_names.update(cdn_metadata['rhsm_product_names'])
            result_summary['all_rhn_channels'] = sorted(rhn_channels)
            result_summary['all_rhsm_content_sets'] = sorted(rhsm_content_sets)
            result_summary['all_rhsm_product_names'] = sorted(rhsm_product_names)

        self._add_mvn_results(result_summary, anitya_mvn_names, arguments['version'])

        return result_data