Ejemplo n.º 1
0
 def _generate_pom_xml(to_solve):
     """
     Create pom.xml with dependencies from to_solve and run 'mvn versions:resolve-ranges',
     which resolves the version ranges (overwrites the pom.xml).
     :param to_solve: {"groupId:artifactId": "version-range"}
     """
     project = etree.Element('project')
     etree.SubElement(project, 'modelVersion').text = '4.0.0'
     etree.SubElement(project, 'groupId').text = 'foo.bar.baz'
     etree.SubElement(project, 'artifactId').text = 'testing'
     etree.SubElement(project, 'version').text = '1.0.0'
     dependencies = etree.SubElement(project, 'dependencies')
     for name, version_range in to_solve.items():
         group_id, artifact_id = name.rstrip(':').split(':')
         dependency = etree.SubElement(dependencies, 'dependency')
         etree.SubElement(dependency, 'groupId').text = group_id
         etree.SubElement(dependency, 'artifactId').text = artifact_id
         etree.SubElement(dependency, 'version').text = version_range
     with open('pom.xml', 'wb') as pom:
         pom.write(
             etree.tostring(project,
                            xml_declaration=True,
                            pretty_print=True))
     TimedCommand.get_command_output(['mvn', 'versions:resolve-ranges'],
                                     graceful=False)
 def zip_file(file, archive, junk_paths=False):
     command = ['zip', archive, file]
     if junk_paths:
         # Store just the name of a saved file (junk the path), not directory names.
         # By default, zip will store the full path (relative to the current directory).
         command.extend(['--junk-paths'])
     TimedCommand.get_command_output(command)
    def _run_analyzer(self, command, json_output=True):
        """Run command (analyzer), if a JSON output is expected, parse it

        :param command: command to be run (command with argument vector as array)
        :param json_output: True if output should be parsed
        :return: status, output, error triplet
        """
        self.log.debug("Executing command, timeout={timeout}: {cmd}".format(
            timeout=self._CLI_TIMEOUT, cmd=command))
        cmd = TimedCommand(command)
        status, output, error = cmd.run(timeout=self._CLI_TIMEOUT)
        self.log.debug("status: %d, output: %s, error: %s", status, output,
                       error)

        if status != 0:
            self.log.warning(
                "Executing command failed, return value: %d, stderr: '%s' ",
                status, error)

        # Some tools such as complexity-report write zero bytes to output (they are propagated from sources like
        # for npm/glob/7.0.3). This caused failures when pushing results to Postgres as Postgres cannot store
        # null bytes in results. Let's be safe here.
        output = list(line.replace('\\u0000', '\\\\0') for line in output)

        if json_output:
            if output:
                output = "".join(output)
                output = json.loads(output)
            else:
                output = {}

        return status, output, error
 def archive(self, basename):
     suffix = "tar.gz"
     filename = basename + "." + suffix
     TimedCommand.get_command_output([
         "git", "archive", "--format={}".format(suffix),
         "--output={}".format(filename), "HEAD"
     ])
     return filename
Ejemplo n.º 5
0
    def add(self, path):
        """
        add path to index

        :param path: str
        """
        with cwd(self.repo_path):
            TimedCommand.get_command_output(["git", "add", path], graceful=False)
Ejemplo n.º 6
0
    def commit(self, message='blank'):
        """
        commit git repository

        :param message: str, commit message
        """
        # --git-dir is #$%^&&
        # http://stackoverflow.com/questions/1386291/git-git-dir-not-working-as-expected
        with cwd(self.repo_path):
            TimedCommand.get_command_output(["git", "commit", "-m", message], graceful=False)
Ejemplo n.º 7
0
 def extract_zip(target, dest, mkdest=False):
     if mkdest:
         try:
             os.mkdir(dest, mode=0o775)
         except FileExistsError:
             pass
     # -o: overwrite existing files without prompting
     TimedCommand.get_command_output(['unzip', '-o', '-d', dest, target])
     # Fix possibly wrong permissions in zip files that would prevent us from deleting files.
     TimedCommand.get_command_output(['chmod', '-R', 'u+rwX,g+rwX', dest])
    def create_git(cls, path):
        """
        initiate new git repository at path

        :param path: str
        :return: instance of Git()
        """
        cls.config()
        TimedCommand.get_command_output(["git", "init", path], graceful=False)
        return cls(path=path)
    def clone(cls, url, path):
        """
        clone repository provided as url to specific path

        :param url: str
        :param path: str
        :return: instance of Git()
        """
        cls.config()
        TimedCommand.get_command_output(["git", "clone", url, path],
                                        graceful=False)
        return cls(path=path)
    def _use_maven_index_checker(self):
        maven_index_checker_dir = os.getenv('MAVEN_INDEX_CHECKER_PATH')
        target_dir = os.path.join(maven_index_checker_dir, 'target')

        s3 = StoragePool.get_connected_storage('S3MavenIndex')
        self.log.info('Fetching pre-built maven index from S3, if available.')
        s3.retrieve_index_if_exists(target_dir)

        index_range = '{}-{}'.format(self.count.min, self.count.max)
        command = [
            'java', '-Xmx768m', '-jar', 'maven-index-checker.jar', '-r',
            index_range
        ]
        with cwd(maven_index_checker_dir):
            output = TimedCommand.get_command_output(command,
                                                     is_json=True,
                                                     graceful=False,
                                                     timeout=1200)
            for idx, release in enumerate(output):
                name = '{}:{}'.format(release['groupId'],
                                      release['artifactId'])
                version = release['version']
                self.log.info("Scheduling #%d.", self.count.min + idx)
                self.analyses_selinon_flow(name, version)
        # index checker should clean up these dirs in /temp/ after itself, but better be sure
        for mindexerdir in glob.glob(
                os.path.join(gettempdir(), 'mindexer-ctxcentral-context*')):
            rmtree(mindexerdir)

        self.log.info('Storing pre-built maven index to S3')
        s3.store_index(target_dir)
        central_index_dir = os.path.join(target_dir, 'central-index')
        rmtree(central_index_dir)
Ejemplo n.º 11
0
    def run_mercator(self,
                     arguments,
                     cache_path,
                     keep_path=False,
                     outermost_only=True,
                     timeout=300):
        result_data = {'status': 'unknown', 'summary': [], 'details': []}

        mercator_target = arguments.get('cache_sources_path', cache_path)
        tc = TimedCommand(['mercator', mercator_target])
        status, data, err = tc.run(
            timeout=timeout,
            is_json=True,
            update_env={'MERCATOR_JAVA_RESOLVE_POMS': 'true'})
        if status != 0:
            self.log.error(err)
            result_data['status'] = 'error'
            return result_data
        ecosystem_object = self.storage.get_ecosystem(arguments['ecosystem'])
        if ecosystem_object.is_backed_by(EcosystemBackend.pypi):
            # TODO: attempt static setup.py parsing with mercator
            items = [self._merge_python_items(mercator_target, data)]
        else:
            if outermost_only:
                # process only root level manifests (or the ones closest to the root level)
                items = self._data_normalizer.get_outermost_items(
                    data.get('items') or [])
            else:
                items = data.get('items') or []
            self.log.debug('mercator found %i projects, outermost %i',
                           len(data), len(items))

            if ecosystem_object.is_backed_by(EcosystemBackend.maven):
                # for maven we download both Jar and POM, we consider POM to be *the*
                #  source of information and don't want to duplicate info by including
                #  data from pom included in artifact (assuming it's included)
                items = [
                    data for data in items
                    if data['ecosystem'].lower() == 'java-pom'
                ]
        result_data['details'] = [
            self._data_normalizer.handle_data(data, keep_path=keep_path)
            for data in items
        ]

        result_data['status'] = 'success'
        return result_data
Ejemplo n.º 12
0
    def add_and_commit_everything(self, message="blank"):
        """
        equiv of:

            git add .
            git commit -m everything

        :param message: str, commit message
        """
        # first we need to remove any .git dirs/files from the archive, they could contain
        #  directions that would break adding (e.g. Flask 0.10 contains .git with gitpath
        #  pointing to Mitsuhiko's home dir)
        TimedCommand.get_command_output(['find', self.repo_path, '-mindepth', '2', '-name', '.git',
                                        '-exec', 'rm', '-rf', '{}', ';'])
        # add everything
        self.add(self.repo_path)
        self.commit(message=message)
Ejemplo n.º 13
0
    def execute(self, arguments):
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        result_data = {'status': 'unknown', 'summary': [], 'details': {}}

        if self._is_valid_ecosystem(arguments['ecosystem']):
            hub = self._get_hub()

            # BlackDuck project doesn't have a notion of ecosystem, so we need to
            # namespace the project names ourselves, so for package `crumb` in the NPM ecosystem
            # we'll end up with the name `npm-crumb`
            project = self._get_project_name(arguments)
            version = arguments['version']

            # Check if the given project had already been scanned
            data = self._release_data(hub, project, version)

            if not data and self._allow_cli_scan:
                self.log.debug("No data available for project {p} {v}".format(
                    p=project, v=version))
                # No data available, issue a new scan and re-query release data
                source_tarball_path = ObjectCache.get_from_dict(
                    arguments).get_source_tarball()
                command = self._prepare_command(project, version,
                                                source_tarball_path)
                self.log.debug(
                    "Executing command, timeout={timeout}: {cmd}".format(
                        timeout=self._BLACKDUCK_CLI_TIMEOUT, cmd=command))
                bd = TimedCommand(command)
                status, output, error = bd.run(
                    timeout=self._BLACKDUCK_CLI_TIMEOUT,
                    update_env={'BD_HUB_PASSWORD': config.blackduck_password})
                self.log.debug("status = %s, error = %s", status, error)
                self.log.debug("output = %s", output)
                data = self._release_data(hub, project, version)

            self.log.debug("Release data for project {p} {v}: {d}".format(
                p=project, v=version, d=data))
            result_data['details'] = data
            result_data['status'] = 'success' if data else 'error'
        else:
            result_data['status'] = 'error'

        return result_data
Ejemplo n.º 14
0
    def clone(cls, url, path, depth=None, branch=None):
        """
        clone repository provided as url to specific path

        :param url: str
        :param path: str
        :param depth: str
        :param branch: str
        :return: instance of Git()
        """
        cls.config()
        cmd = ["git", "clone", url, path]
        if depth is not None:
            cmd.extend(["--depth", depth])
        if branch is not None:
            cmd.extend(["--branch", branch])
        TimedCommand.get_command_output(cmd, graceful=False)
        return cls(path=path)
        def worker(path):
            mime = TimedCommand.get_command_output(['file', path, '-b',
                                                    '-i']).pop()
            self.log.debug("%s mime = %s", path, mime)
            typ = TimedCommand.get_command_output(['file', path, '-b'])
            self.log.debug("%s filetype = %s", path, typ)

            linguist = None
            if 'charset=binary' not in mime:
                linguist = self._parse_linguist(
                    TimedCommand.get_command_output(['linguist', path]))
                self.log.debug("%s linguist output = %s", path, linguist)

            results.append({
                "type": typ,
                "output": linguist,
                "path": os.path.relpath(path, cache_path),
            })
 def compute_ssdeep(self, target):
     """ Compute SSdeep piece-wise linear hash of target """
     # 0 : ssdeep header
     # 1 : hash,filename
     data = TimedCommand.get_command_output(['ssdeep', '-c', '-s', target])
     try:
         return data[1].split(',')[0].strip()
     except IndexError:
         self.log.error("unable to compute ssdeep of %r", target)
         raise RuntimeError("can't compute digest of %r" % target)
Ejemplo n.º 17
0
    def rev_parse(self, args=None):
        """
        :param args: arguments to pass to `git rev-parse`

        :return: [str], output from `git rev-parse`
        """

        cmd = ["git", "rev-parse"]
        if args:
            cmd.extend(args)

        with cwd(self.repo_path):
            return TimedCommand.get_command_output(cmd, graceful=False)
    def _get_snyk_vulndb(self):
        """
        :return: retrieve Snyk CVE db
        """

        with tempdir() as vulndb_dir:
            # clone vulndb git repo
            self.log.debug("Cloning snyk/vulndb repo")
            Git.clone(self._VULNDB_GIT_REPO, vulndb_dir)
            with cwd(vulndb_dir):
                # install dependencies
                self.log.debug("Installing snyk/vulndb dependencies")
                TimedCommand.get_command_output(['npm', 'install'])
                # generate database (json in file)
                self.log.debug("Generating snyk/vulndb")
                TimedCommand.get_command_output([
                    os.path.join('cli', 'shrink.js'), 'data',
                    self._VULNDB_FILENAME
                ])
                # parse the JSON so we are sure that we have a valid JSON
                with open(self._VULNDB_FILENAME) as f:
                    return json.load(f)
Ejemplo n.º 19
0
    def execute(self, arguments):
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        cache_path = ObjectCache.get_from_dict(arguments).get_source_tarball()

        results = []
        for path in get_all_files_from(cache_path, path_filter=skip_git_files):
            self.log.debug("path = %s", path)

            bw = TimedCommand(['binwalk', '-B', path])
            status, output, error = bw.run(timeout=60)
            self.log.debug("status = %s, error = %s", status, error)
            self.log.debug("output = %s", output)

            parsed_binwalk = self.parse_binwalk(output)
            results.append({
                "path": os.path.relpath(path, cache_path),
                "output": parsed_binwalk,
            })
        return {'summary': [], 'status': 'success', 'details': results}
Ejemplo n.º 20
0
 def config():
     """
     configure git
     """
     user_name = configuration.git_user_name
     user_email = configuration.git_user_email
     if not TimedCommand.get_command_output(["git", "config", "--get", "user.name"]):
         TimedCommand.get_command_output(["git", "config", "--global", "user.name", user_name])
     if not TimedCommand.get_command_output(["git", "config", "--get", "user.email"]):
         TimedCommand.get_command_output(["git", "config", "--global", "user.email", user_email])
Ejemplo n.º 21
0
 def extract_gem(target, dest):
     """
     extract target gem into $dest/sources and
             gemspec (renamed to rubygems-metadata.yaml) into $dest/metadata/
     """
     sources = os.path.join(dest, 'sources')
     metadata = os.path.join(dest, 'metadata')
     TimedCommand.get_command_output(['mkdir', '-p', sources, metadata])
     TimedCommand.get_command_output(['gem', 'unpack', target, '--target', sources])
     with cwd(metadata):
         # --spec ignores --target, so we need to cwd
         TimedCommand.get_command_output(['gem', 'unpack', target, '--spec'])
         metadatayaml = glob.glob('*.gemspec').pop()
         os.rename(metadatayaml, 'rubygems-metadata.yaml')
    def execute(self, arguments):
        """
        task code

        :param arguments: dictionary with arguments
        :return: {}, results
        """
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        try:
            cache_path = ObjectCache.get_from_dict(arguments).get_sources()
        except Exception as e:
            eco = arguments.get('ecosystem')
            pkg = arguments.get('name')
            ver = arguments.get('version')
            if arguments['ecosystem'] != 'maven':
                self.log.error(
                    'Could not get sources for package {e}/{p}/{v}'.format(
                        e=eco, p=pkg, v=ver))
                raise
            self.log.info('Could not get sources for maven package {p}/{v},'
                          'will try to run on binary jar'.format(p=pkg, v=ver))
            cache_path = ObjectCache.get_from_dict(
                arguments).get_extracted_source_tarball()

        result_data = {'status': 'unknown', 'summary': {}, 'details': {}}
        try:
            result_data['details'] = TimedCommand.get_command_output(
                ['license_check.py', cache_path], graceful=False, is_json=True)
            result_data['status'] = result_data['details'].pop('status')
            result_data['summary'] = result_data['details'].pop('summary')
        except:
            self.log.exception("License scan failed")
            result_data['status'] = 'error'

        return result_data
Ejemplo n.º 23
0
    def execute(self, arguments):
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        cache_path = ObjectCache.get_from_dict(arguments).get_extracted_source_tarball()

        results = {'status': 'unknown',
                   'summary': {},
                   'details': []}

        try:
            oscc = TimedCommand.get_command_output(['oscryptocatcher', '--subdir-in-result', cache_path],
                                                   graceful=False, is_json=True)

            self.log.debug("oscryptocatcher %s output: %s", cache_path, oscc)
            results['details'] = oscc['details']
            results['summary'] = oscc['summary']
            results['status'] = 'success'
        except:
            results['status'] = 'error'

        return results
Ejemplo n.º 24
0
    def _run_owasp_dep_check(self, scan_path, experimental=False):
        def _clean_dep_check_tmp():
            for dcdir in glob.glob(os.path.join(gettempdir(), 'dctemp*')):
                rmtree(dcdir)

        s3 = StoragePool.get_connected_storage('S3OWASPDepCheck')
        depcheck = os.path.join(os.environ['OWASP_DEP_CHECK_PATH'], 'bin', 'dependency-check.sh')
        with tempdir() as temp_data_dir:
            retrieved = s3.retrieve_depcheck_db_if_exists(temp_data_dir)
            if not retrieved:
                self.log.debug('No cached OWASP Dependency-Check DB, generating fresh now ...')
                command = [depcheck, '--updateonly', '--data', temp_data_dir]
                # give DependencyCheck 30 minutes to download the DB
                TimedCommand.get_command_output(command, graceful=False, timeout=1800)
            report_path = os.path.join(temp_data_dir, 'report.xml')
            command = [depcheck,
                       '--noupdate',
                       '--format', 'XML',
                       '--project', 'test',
                       '--data', temp_data_dir,
                       '--scan', scan_path,
                       '--out', report_path]
            if experimental:
                command.extend(['--enableExperimental'])
            output = []
            try:
                self.log.debug('Running OWASP Dependency-Check to scan %s for vulnerabilities' %
                               scan_path)
                output = TimedCommand.get_command_output(command,
                                                         graceful=False,
                                                         timeout=600)  # 10 minutes
                with open(report_path) as r:
                    report_dict = anymarkup.parse(r.read())
            except (TaskError, FileNotFoundError) as e:
                _clean_dep_check_tmp()
                for line in output:
                    self.log.warning(line)
                self.log.exception(str(e))
                return {'summary': ['OWASP Dependency-Check scan failed'],
                        'status': 'error',
                        'details': []}
            # If the CVEDBSyncTask has never been run before, we just had to create the DB ourselves
            # Make the life easier for other workers and store it to S3
            s3.store_depcheck_db_if_not_exists(temp_data_dir)
            _clean_dep_check_tmp()


        results = []
        dependencies = report_dict.get('analysis', {}).get('dependencies', {}).get('dependency', [])
        if not isinstance(dependencies, list):
            dependencies = [dependencies]
        for dependency in dependencies:
            vulnerabilities = dependency.get('vulnerabilities', {}).get('vulnerability', [])
            if not isinstance(vulnerabilities, list):
                vulnerabilities = [vulnerabilities]
            for vulnerability in vulnerabilities:
                av = vulnerability.get('cvssAccessVector')
                av = av[0] if av else '?'
                ac = vulnerability.get('cvssAccessComplexity')
                ac = ac[0] if ac else '?'
                au = vulnerability.get('cvssAuthenticationr')
                au = au[0] if au else '?'
                c = vulnerability.get('cvssConfidentialImpact')
                c = c[0] if c else '?'
                i = vulnerability.get('cvssIntegrityImpact')
                i = i[0] if i else '?'
                a = vulnerability.get('cvssAvailabilityImpact')
                a = a[0] if a else '?'
                vector = "AV:{AV}/AC:{AC}/Au:{Au}/C:{C}/I:{I}/A:{A}".\
                    format(AV=av, AC=ac, Au=au, C=c, I=i, A=a)
                result = {
                    'cvss': {
                        'score': vulnerability.get('cvssScore'),
                        'vector': vector
                    }
                }
                references = vulnerability.get('references', {}).get('reference', [])
                if not isinstance(references, list):
                    references = [references]
                result['references'] = [r.get('url') for r in references]
                for field in ['severity', 'description']:
                    result[field] = vulnerability.get(field)
                result['id'] = vulnerability.get('name')
                results.append(result)

        return {'summary': [r['id'] for r in results],
                'status': 'success',
                'details': results}
    def fetch_artifact(ecosystem=None,
                       artifact=None,
                       version=None,
                       target_dir='.'):
        """
        download artifact from registry and process it

        :param ecosystem:
        :param artifact:
        :param version:
        :param target_dir:
        :return: tuple: (digest, artifact_path)
        """
        parsed = urlparse(artifact)
        digest = None
        artifact_path = None

        if ecosystem.is_backed_by(EcosystemBackend.pypi):
            git = Git.create_git(target_dir)
            # NOTE: we can't download Python packages via pip, because it runs setup.py
            #  even with `pip download`. Therefore we could always get syntax errors
            #  because of older/newer syntax.
            res = requests.get(
                'https://pypi.python.org/pypi/{a}/json'.format(a=artifact))
            res.raise_for_status()
            if not version:
                version = res.json()['info']['version']
            release_files = res.json()['releases'][version]

            # sort releases by order in which we'd like to download:
            #  1) sdist
            #  2) wheels
            #  3) eggs
            #  4) anything else (creepy stuff)
            def release_key(rel):
                return {
                    'sdist': 0,
                    'bdist_wheel': 1,
                    'bdist_egg': 2
                }.get(rel['packagetype'], 3)

            release_files = list(sorted(release_files, key=release_key))
            file_url = release_files[0]['url']
            local_filename = IndianaJones.download_file(file_url, target_dir)
            artifact_path = os.path.join(target_dir, local_filename)
            digest = compute_digest(artifact_path)
            Archive.extract(artifact_path, target_dir)
            git.add_and_commit_everything()
        elif ecosystem.is_backed_by(EcosystemBackend.npm):
            git = Git.create_git(target_dir)

            # $ npm config get cache
            # /root/.npm
            cache_path = TimedCommand.get_command_output(
                ['npm', 'config', 'get', 'cache'], graceful=False).pop()

            # add package to cache:
            # /root/.npm/express/
            # └── 4.13.4
            #      ├── package
            #      │   ├── History.md
            #      │   ├── index.js
            #      │   ├── lib
            #      │   ├── LICENSE
            #      │   ├── package.json
            #      │   └── Readme.md
            #      └── package.tgz
            # 3 directories, 6 files
            name_ver = artifact
            if version:
                name_ver = "{}@{}".format(artifact, version)
            # make sure the artifact is not in the cache yet
            TimedCommand.get_command_output(
                ['npm', 'cache', 'clean', artifact], graceful=False)
            logger.info("downloading npm module %s", name_ver)
            npm_command = ['npm', 'cache', 'add', name_ver]
            TimedCommand.get_command_output(npm_command, graceful=False)

            # copy tarball to workpath
            tarball_name = "package.tgz"
            glob_path = os.path.join(cache_path, artifact, "*")
            cache_abs_path = os.path.abspath(glob.glob(glob_path).pop())
            artifact_path = os.path.join(cache_abs_path, tarball_name)
            logger.debug("[cache] tarball path = %s", artifact_path)
            artifact_path = shutil.copy(artifact_path, target_dir)

            logger.debug("[workdir] tarball path = %s", artifact_path)
            # Prior to npm-2.x.x (Fedora 24)
            # npm client was repackaging modules on download. It modified file permissions inside
            # package.tgz so they matched UID/GID of a user running npm command. Therefore its
            # digest was different then of a tarball downloaded directly from registry.npmjs.org.
            digest = compute_digest(artifact_path)
            Archive.extract(artifact_path, target_dir)

            # copy package/package.json over the extracted one,
            # because it contains (since npm >= 2.x.x) more information.
            npm_package_json = os.path.join(cache_abs_path, 'package',
                                            'package.json')
            shutil.copy(npm_package_json, target_dir)
            # copy package/npm-shrinkwrap.json to target_dir
            npm_shrinkwrap_json = os.path.join(target_dir, 'package',
                                               'npm-shrinkwrap.json')
            if os.path.isfile(npm_shrinkwrap_json):
                shutil.copy(npm_shrinkwrap_json, target_dir)
            git.add_and_commit_everything()
        elif ecosystem.is_backed_by(EcosystemBackend.rubygems):
            git = Git.create_git(target_dir)
            logger.info("downloading rubygems package %s-%s", artifact,
                        version)
            version_arg = []
            if version:
                version_arg = ['--version', version]
            gem_command = ['gem', 'fetch', artifact]
            gem_command.extend(version_arg)
            with cwd(target_dir):
                TimedCommand.get_command_output(gem_command, graceful=False)

            if not version:
                # if version is None we need to glob for the version that was downloaded
                artifact_path = os.path.abspath(
                    glob.glob(os.path.join(target_dir, artifact + '*')).pop())
            else:
                artifact_path = os.path.join(
                    target_dir, '{n}-{v}.gem'.format(n=artifact, v=version))

            digest = compute_digest(artifact_path)
            Archive.extract(artifact_path, target_dir)
            git.add_and_commit_everything()
        elif ecosystem.is_backed_by(EcosystemBackend.maven):
            git = Git.create_git(target_dir)
            artifact_coords = MavenCoordinates.from_str(artifact)
            # lxml can't handle HTTPS URLs
            maven_url = "http://repo1.maven.org/maven2/"
            if not version:
                version = mvn_find_latest_version(maven_url, artifact_coords)
            artifact_coords.version = version
            logger.info("downloading maven package %s",
                        artifact_coords.to_str())

            if not artifact_coords.is_valid():
                raise ValueError("Invalid Maven coordinates: {a}".format(
                    a=artifact_coords.to_str()))

            artifact_url = urljoin(maven_url, artifact_coords.to_repo_url())
            local_filename = IndianaJones.download_file(
                artifact_url, target_dir)
            if local_filename is None:
                raise RuntimeError("Unable to download: %s" % artifact_url)
            artifact_path = os.path.join(
                target_dir,
                os.path.split(artifact_coords.to_repo_url())[1])
            digest = compute_digest(artifact_path)
            if artifact_coords.packaging != 'pom':
                Archive.extract(artifact_path, target_dir)
            git.add_and_commit_everything()
        elif ecosystem.is_backed_by(EcosystemBackend.scm):
            git = Git.clone(artifact, target_dir)
            digest = IndianaJones.get_revision(target_dir)
            artifact_path = git.archive(artifact)
        elif parsed:
            if parsed[0] == 'git' or parsed[2].endswith('.git'):
                git = Git.clone(artifact, target_dir)
                digest = IndianaJones.get_revision(target_dir)
                artifact_path = git.archive(artifact)

        return digest, artifact_path
 def get_revision(target_directory):
     """ Get digest of last commit """
     with cwd(target_directory):
         return TimedCommand.get_command_output(
             ['git', 'rev-parse', 'HEAD'], graceful=False).pop()
 def extract_tar(target, dest):
     TimedCommand.get_command_output(['tar', 'xf', target, '-C', dest])
 def extract_zip(target, dest):
     # -o: overwrite existing files without prompting
     TimedCommand.get_command_output(['unzip', '-o', '-d', dest, target])
     # Fix possibly wrong permissions in zip files that would prevent us from deleting files.
     TimedCommand.get_command_output(['chmod', '-R', 'u+rwX,g+rwX', dest])
 def _update_dep_check_db(self, data_dir):
     depcheck = os.path.join(os.environ['OWASP_DEP_CHECK_PATH'], 'bin',
                             'dependency-check.sh')
     self.log.debug('Updating OWASP Dependency-Check CVE DB')
     TimedCommand.get_command_output(
         [depcheck, '--updateonly', '--data', data_dir], timeout=1800)
    def execute(self, arguments):
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        eco = arguments['ecosystem']
        pkg = arguments['name']
        tool_responses = {}
        result_summary = {
            'package_names': [],
            'registered_srpms': [],
            'all_rhn_channels': [],
            'all_rhsm_content_sets': [],
            'all_rhsm_product_names': []
        }
        result_data = {'status': 'error',
                       'summary': result_summary,
                       'details': tool_responses
                       }

        # bail out early; we need access to internal services or the package is from Maven ecosystem,
        # otherwise we can't comment on downstream usage
        is_maven = Ecosystem.by_name(self.storage.session, eco).is_backed_by(EcosystemBackend.maven)
        if not self._is_inside_rh() and not is_maven:
            return result_data

        self.log.debug('Fetching {e}/{p} from Anitya'.format(e=eco, p=pkg))
        res = self._fetch_anitya_project(eco, pkg)
        anitya_rpm_names = []
        anitya_mvn_names = []
        if res is None:
            result_data['status'] = 'error'
        elif res.status_code == 200:
            self.log.debug('Retrieved {e}/{p} from Anitya'.format(e=eco, p=pkg))
            anitya_response = res.json()
            tool_responses['redhat_anitya'] = anitya_response
            # For now, we assume all downstreams are ones we care about
            for entry in anitya_response['packages']:
                if entry['distro'] == RH_RPM_DISTRO_NAME:
                    anitya_rpm_names.append(entry['package_name'])
                elif entry['distro'] == RH_MVN_DISTRO_NAME:
                    anitya_mvn_names.append(entry['package_name'])
                else:
                    self.log.warning(
                        'Unknown distro {d} for downstream package {o} (package {p}) in Anitya'.
                                     format(d=entry['distro'], o=entry['package_name'], p=pkg)
                    )
            self.log.debug('Candidate RPM names from Anitya: {}'.format(anitya_rpm_names))
            self.log.debug('Candidate MVN names from Anitya: {}'.format(anitya_mvn_names))
            # TODO: Report 'partial' here and switch to 'success' at the end
            result_data['status'] = 'success'
        else:
            msg = 'Failed to find Anitya project {e}/{p}. Anitya response: {r}'
            self.log.error(msg.format(e=eco, p=pkg, r=res.text))
            result_data['status'] = 'error'

        if self._is_inside_rh():
            # we have candidate downstream name mappings, check them against Brew
            seed_names = anitya_rpm_names or [self._prefix_package_name(pkg, eco)]
            self.log.debug('Checking candidate names in Brew: {}'.format(seed_names))

            args = ['brew-utils-cli', '--version', arguments['version']]
            artifact_hash = self._get_artifact_hash(algorithm='sha256')
            if artifact_hash:
                args += ['--digest', artifact_hash]
            args += seed_names

            self.log.debug("Executing command, timeout={timeout}: {cmd}".format(timeout=self._BREWUTILS_CLI_TIMEOUT,
                                                                                cmd=args))
            tc = TimedCommand(args)
            status, output, error = tc.run(timeout=self._BREWUTILS_CLI_TIMEOUT)
            self.log.debug("status = %s, error = %s", status, error)
            output = ''.join(output)
            self.log.debug("output = %s", output)
            if not output:
                raise TaskError("Error running command %s" % args)
            brew = json.loads(output)

            result_summary['package_names'] = brew['packages']
            result_summary['registered_srpms'] = brew['response']['registered_srpms']
            tool_responses['brew'] = brew['response']['brew']

            # we have SRPM details, fetch details on where the RPMs are shipped
            tool_responses['pulp_cdn'] = pulp_responses = []
            rhn_channels = set()
            rhsm_content_sets = set()
            rhsm_product_names = set()
            for srpm_summary in result_summary['registered_srpms']:
                srpm_filename = "{n}-{v}-{r}.src.rpm".format(n=srpm_summary['package_name'],
                                                             v=srpm_summary['version'],
                                                             r=srpm_summary['release'])
                cdn_metadata = self._get_cdn_metadata(srpm_filename)
                if cdn_metadata is None:
                    msg = 'Error getting shipping data for {e}/{p} SRPM: {srpm}'
                    self.log.error(msg.format(e=eco, p=pkg, srpm=srpm_filename))
                    continue
                pulp_responses.append(cdn_metadata)
                srpm_summary['published_in'] = cdn_metadata['rhsm_product_names']
                rhn_channels.update(cdn_metadata['rhn_channels'])
                rhsm_content_sets.update(cdn_metadata['rhsm_content_sets'])
                rhsm_product_names.update(cdn_metadata['rhsm_product_names'])
            result_summary['all_rhn_channels'] = sorted(rhn_channels)
            result_summary['all_rhsm_content_sets'] = sorted(rhsm_content_sets)
            result_summary['all_rhsm_product_names'] = sorted(rhsm_product_names)

        self._add_mvn_results(result_summary, anitya_mvn_names, arguments['version'])

        return result_data