예제 #1
 def test_compute_digest_for_empty_input(self):
     """Test compute_digest() for empty input."""
     # please see https://www.di-mgt.com.au/sha_testvectors.html
     # for explanation
     assert compute_digest("/dev/null") is not None
     SHA256_FOR_EMPTY_INPUT = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
     assert compute_digest("/dev/null") == SHA256_FOR_EMPTY_INPUT
예제 #2
    def compute_digests(self, cache_path, f, artifact=False):
        f_digests = {
            'sha256': compute_digest(f, 'sha256'),
            'sha1': compute_digest(f, 'sha1'),
            'md5': compute_digest(f, 'md5'),
            'ssdeep': self.compute_ssdeep(f)

        if artifact:
            f_digests['artifact'] = True
            f_digests['path'] = os.path.basename(f)
            f_digests['path'] = os.path.relpath(f, cache_path)

        return f_digests
    def test_execute(self, tmpdir):
        artifact_digest, artifact_path = IndianaJones.fetch_artifact(
            Ecosystem(name='pypi', backend=EcosystemBackend.pypi),

        args = dict.fromkeys(('ecosystem', 'name', 'version'), 'some-value')
        # flexmock(EPVCache).should_receive('get_extracted_source_tarball').and_return(str(tmpdir))
        task = DigesterTask.create_test_instance(task_name='digests')
        results = task.execute(arguments=args)

        assert results is not None
        assert isinstance(results, dict)
        assert set(results.keys()) == {'details', 'status', 'summary'}
        artifact_details = None
        for details in results['details']:
            assert {'sha256', 'sha1', 'md5', 'ssdeep',
            if details.get('artifact'):
                artifact_details = details
        # there are artifact details
        assert artifact_details is not None
        # the artifact digest which Indy returns is the same as the one from DigesterTask
        assert artifact_digest == artifact_details['sha256'] == compute_digest(
        assert artifact_details['path'] == 'six-1.0.0.tar.gz'
예제 #4
    def fetch_maven_artifact(ecosystem, name, version, target_dir):
        """Fetch maven artifact from maven.org."""
        git = Git.create_git(target_dir)
        artifact_coords = MavenCoordinates.from_str(name)
        if not version:
            raise ValueError("No version provided for '%s'" %
        artifact_coords.version = version
        if not artifact_coords.is_valid():
            raise NotABugTaskError("Invalid Maven coordinates: {a}".format(

        maven_url = ecosystem.fetch_url
        artifact_url = urljoin(maven_url, artifact_coords.to_repo_url())
        local_filepath = IndianaJones.download_file(artifact_url, target_dir)
        if local_filepath is None:
            raise NotABugTaskError("Unable to download: %s" % artifact_url)

        local_filename = os.path.split(local_filepath)[1]
        artifact_path = os.path.join(target_dir, local_filename)
        digest = compute_digest(artifact_path)
        if artifact_coords.packaging != 'pom':
            Archive.extract(artifact_path, target_dir)
            if artifact_coords.packaging == 'aar':
                # 'aar' archive contains classes.jar, extract it too into target_dir
                classes_jar_path = os.path.join(target_dir, "classes.jar")
                if os.path.isfile(classes_jar_path):
                    Archive.extract(classes_jar_path, target_dir)

        return digest, artifact_path
예제 #5
    def fetch_pypi_artifact(name, version, target_dir):
        """Fetch Pypi artifact."""
        git = Git.create_git(target_dir)
        # NOTE: we can't download Python packages via pip, because it runs setup.py
        #  even with `pip download`. Therefore we could always get syntax errors
        #  because of older/newer syntax.
        res = requests.get('https://pypi.python.org/pypi/{n}/json'.format(n=name))
        if not version:
            version = res.json()['info']['version']
        release_files = res.json().get('releases', {}).get(version, [])
        if not release_files:
            raise RuntimeError("No release files for version %s" % version)

        # sort releases by order in which we'd like to download:
        #  1) sdist
        #  2) wheels
        #  3) eggs
        #  4) anything else (creepy stuff)
        def release_key(rel):
            return {'sdist': 0, 'bdist_wheel': 1, 'bdist_egg': 2}.get(rel['packagetype'], 3)

        release_files = list(sorted(release_files, key=release_key))
        file_url = release_files[0]['url']
        local_filename = IndianaJones.download_file(file_url, target_dir)
        if local_filename is None:
            raise RuntimeError("Unable to download: %s" % file_url)
        artifact_path = os.path.join(target_dir, local_filename)
        digest = compute_digest(artifact_path)
        Archive.extract(artifact_path, target_dir)
        return digest, artifact_path
예제 #6
    def fetch_npm_artifact(name, version, target_dir):
        """Fetch npm artifact using system 'npm' tool."""
        git = Git.create_git(target_dir)

        # $ npm config get cache
        # /root/.npm
        cache_path = TimedCommand.get_command_output(
            ['npm', 'config', 'get', 'cache'], graceful=False).pop()

        # add package to cache:
        # /root/.npm/express/
        # └── 4.13.4
        #      ├── package
        #      │   ├── History.md
        #      │   ├── index.js
        #      │   ├── lib
        #      │   ├── LICENSE
        #      │   ├── package.json
        #      │   └── Readme.md
        #      └── package.tgz
        # 3 directories, 6 files
        name_ver = name
        if version:
            name_ver = "{}@{}".format(name, version)
        # make sure the artifact is not in the cache yet
        TimedCommand.get_command_output(['npm', 'cache', 'clean', name],
        logger.info("downloading npm module %s", name_ver)
        npm_command = ['npm', 'cache', 'add', name_ver]
        TimedCommand.get_command_output(npm_command, graceful=False)

        # copy tarball to workpath
        tarball_name = "package.tgz"
        glob_path = os.path.join(cache_path, name, "*")
        cache_abs_path = os.path.abspath(glob.glob(glob_path).pop())
        artifact_path = os.path.join(cache_abs_path, tarball_name)
        logger.debug("[cache] tarball path = %s", artifact_path)
        artifact_path = shutil.copy(artifact_path, target_dir)

        logger.debug("[workdir] tarball path = %s", artifact_path)
        # Prior to npm-2.x.x (Fedora 24)
        # npm client was repackaging modules on download. It modified file permissions inside
        # package.tgz so they matched UID/GID of a user running npm command. Therefore its
        # digest was different then of a tarball downloaded directly from registry.npmjs.org.
        digest = compute_digest(artifact_path)
        Archive.extract(artifact_path, target_dir)

        # copy package/package.json over the extracted one,
        # because it contains (since npm >= 2.x.x) more information.
        npm_package_json = os.path.join(cache_abs_path, 'package',
        shutil.copy(npm_package_json, target_dir)
        # copy package/npm-shrinkwrap.json to target_dir
        npm_shrinkwrap_json = os.path.join(target_dir, 'package',
        if os.path.isfile(npm_shrinkwrap_json):
            shutil.copy(npm_shrinkwrap_json, target_dir)
        return digest, artifact_path
예제 #7
 def fetch_nuget_artifact(name, version, target_dir):
     git = Git.create_git(target_dir)
     nuget_url = 'https://api.nuget.org/packages/'
     file_url = '{url}{name}.{version}.nupkg'.format(
         url=nuget_url, name=name.lower(), version=version.lower())
     local_filename = IndianaJones.download_file(file_url, target_dir)
     if local_filename is None:
         raise RuntimeError("Unable to download: %s" % file_url)
     artifact_path = os.path.join(target_dir, local_filename)
     digest = compute_digest(artifact_path)
     Archive.extract(artifact_path, target_dir)
     return digest, artifact_path
예제 #8
 def fetch_nuget_artifact(ecosystem, name, version, target_dir):
     """Fetch nuget artifact from nuget.org."""
     git = Git.create_git(target_dir)
     nuget_url = ecosystem.fetch_url
     file_url = '{url}{name}.{version}.nupkg'.format(
         url=nuget_url, name=name.lower(), version=version.lower())
     local_filename = IndianaJones.download_file(file_url, target_dir)
     if local_filename is None:
         raise NotABugTaskError("Unable to download: %s" % file_url)
     artifact_path = os.path.join(target_dir, local_filename)
     digest = compute_digest(artifact_path)
     Archive.extract(artifact_path, target_dir)
     return digest, artifact_path
예제 #9
 def fetch_scm_artifact(name, version, target_dir):
     env = dict(os.environ)
     env['GOPATH'] = target_dir
     TimedCommand.get_command_output(['go', 'get', '-d', name],
     package_dir = os.path.join(target_dir, 'src', name)
     with cwd(package_dir):
         git = Git(package_dir)
         git.reset(version, hard=True)
         artifact_filename = git.archive(version)
         artifact_path = os.path.join(package_dir, artifact_filename)
         digest = compute_digest(artifact_path)
         return digest, artifact_path
예제 #10
 def fetch_go_artifact(name, version, target_dir):
     """Fetch go artifact using 'go get' command."""
     env = dict(os.environ)
     env['GOPATH'] = target_dir
         TimedCommand.get_command_output(['go', 'get', '-d', name],
     except TaskError:
         raise NotABugTaskError('Unable to go-get {n}'.format(n=name))
     package_dir = os.path.join(target_dir, 'src', name)
     with cwd(package_dir):
         git = Git(package_dir)
         git.reset(version, hard=True)
         artifact_filename = git.archive(version)
         artifact_path = os.path.join(package_dir, artifact_filename)
         digest = compute_digest(artifact_path)
         return digest, artifact_path
예제 #11
    def fetch_rubygems_artifact(name, version, target_dir):
        git = Git.create_git(target_dir)
        logger.info("downloading rubygems package %s-%s", name, version)
        version_arg = []
        if version:
            version_arg = ['--version', version]
        gem_command = ['gem', 'fetch', name]
        with cwd(target_dir):
            TimedCommand.get_command_output(gem_command, graceful=False)

        if not version:
            # if version is None we need to glob for the version that was downloaded
            artifact_path = os.path.abspath(
                glob.glob(os.path.join(target_dir, name + '*')).pop())
            artifact_path = os.path.join(
                target_dir, '{n}-{v}.gem'.format(n=name, v=version))

        digest = compute_digest(artifact_path)
        Archive.extract(artifact_path, target_dir)
        return digest, artifact_path
예제 #12
    def fetch_npm_artifact(ecosystem, name, version, target_dir):
        """Fetch npm artifact using system 'npm' tool."""
        git = Git.create_git(target_dir)

        npm_cmd = ['npm', '--registry', ecosystem.fetch_url]

        # $ npm config get cache
        # /root/.npm
        cache_path = TimedCommand.get_command_output(
            npm_cmd + ['config', 'get', 'cache'], graceful=False).pop()

        # add package to cache:
        # /root/.npm/express/
        # └── 4.13.4
        #      ├── package
        #      │   ├── History.md
        #      │   ├── index.js
        #      │   ├── lib
        #      │   ├── LICENSE
        #      │   ├── package.json
        #      │   └── Readme.md
        #      └── package.tgz
        # 3 directories, 6 files
        name_ver = name

            # importing here to avoid circular dependency
            from f8a_worker.solver import NpmReleasesFetcher

            version_list = NpmReleasesFetcher(ecosystem).fetch_releases(
            if version not in version_list:
                raise NotABugTaskError(
                    "Provided version is not supported '%s'" % name)
                name_ver = "{}@{}".format(name, version)
        except ValueError as e:
            raise NotABugTaskError(
                'No versions for package NPM package {p} ({e})'.format(
                    p=name, e=str(e)))

        # make sure the artifact is not in the cache yet
        TimedCommand.get_command_output(npm_cmd + ['cache', 'clean', name],
        logger.info("downloading npm module %s", name_ver)
        cmd = npm_cmd + ['cache', 'add', name_ver]
        TimedCommand.get_command_output(cmd, graceful=False)

        # copy tarball to workpath
        tarball_name = "package.tgz"
        glob_path = os.path.join(cache_path, name, "*")
        cache_abs_path = os.path.abspath(glob.glob(glob_path).pop())
        artifact_path = os.path.join(cache_abs_path, tarball_name)
        logger.debug("[cache] tarball path = %s", artifact_path)
        artifact_path = shutil.copy(artifact_path, target_dir)

        logger.debug("[workdir] tarball path = %s", artifact_path)
        # Prior to npm-2.x.x (Fedora 24)
        # npm client was repackaging modules on download. It modified file permissions inside
        # package.tgz so they matched UID/GID of a user running npm command. Therefore its
        # digest was different then of a tarball downloaded directly from registry.npmjs.org.
        digest = compute_digest(artifact_path)
        Archive.extract(artifact_path, target_dir)
        Archive.fix_permissions(os.path.join(cache_abs_path, 'package'))

        # copy package/package.json over the extracted one,
        # because it contains (since npm >= 2.x.x) more information.
        npm_package_json = os.path.join(cache_abs_path, 'package',
        shutil.copy(npm_package_json, target_dir)
        # copy package/npm-shrinkwrap.json to target_dir
        npm_shrinkwrap_json = os.path.join(target_dir, 'package',
        if os.path.isfile(npm_shrinkwrap_json):
            shutil.copy(npm_shrinkwrap_json, target_dir)
        return digest, artifact_path
예제 #13
 def test_compute_digest(self):
     """Test compute_digest()."""
     assert compute_digest("/etc/os-release")
     with pytest.raises(TaskError):
         assert compute_digest("/", raise_on_error=True)
     assert compute_digest("/") is None