def compute_digests(self, cache_path, f, artifact=False):
        f_digests = {
            'sha256': compute_digest(f, 'sha256'),
            'sha1': compute_digest(f, 'sha1'),
            'md5': compute_digest(f, 'md5'),
            'ssdeep': self.compute_ssdeep(f)
        }

        if artifact:
            f_digests['artifact'] = True
            f_digests['path'] = os.path.basename(f)
        else:
            f_digests['path'] = os.path.relpath(f, cache_path)

        return f_digests
Exemple #2
0
    def test_execute(self, tmpdir):
        artifact_digest, artifact_path = IndianaJones.fetch_artifact(
            Ecosystem(name='pypi', backend=EcosystemBackend.pypi),
            artifact=PYPI_MODULE_NAME,
            version=PYPI_MODULE_VERSION,
            target_dir=str(tmpdir))

        args = dict.fromkeys(('ecosystem', 'name', 'version'), 'some-value')
        flexmock(EPVCache).should_receive(
            'get_extracted_source_tarball').and_return(str(tmpdir))
        flexmock(EPVCache).should_receive('get_source_tarball').and_return(
            artifact_path)
        task = DigesterTask.create_test_instance(task_name='digests')
        results = task.execute(arguments=args)

        assert results is not None
        assert isinstance(results, dict)
        assert set(results.keys()) == {'details', 'status', 'summary'}
        artifact_details = None
        for details in results['details']:
            assert {'sha256', 'sha1', 'md5', 'ssdeep',
                    'path'}.issubset(set(details.keys()))
            if details.get('artifact'):
                artifact_details = details
        # there are artifact details
        assert artifact_details is not None
        # the artifact digest which Indy returns is the same as the one from DigesterTask
        assert artifact_digest == artifact_details['sha256'] == compute_digest(
            artifact_path)
        assert artifact_details['path'] == 'six-1.0.0.tar.gz'
    def fetch_artifact(ecosystem=None,
                       artifact=None,
                       version=None,
                       target_dir='.'):
        """
        download artifact from registry and process it

        :param ecosystem:
        :param artifact:
        :param version:
        :param target_dir:
        :return: tuple: (digest, artifact_path)
        """
        parsed = urlparse(artifact)
        digest = None
        artifact_path = None

        if ecosystem.is_backed_by(EcosystemBackend.pypi):
            git = Git.create_git(target_dir)
            # NOTE: we can't download Python packages via pip, because it runs setup.py
            #  even with `pip download`. Therefore we could always get syntax errors
            #  because of older/newer syntax.
            res = requests.get(
                'https://pypi.python.org/pypi/{a}/json'.format(a=artifact))
            res.raise_for_status()
            if not version:
                version = res.json()['info']['version']
            release_files = res.json()['releases'][version]

            # sort releases by order in which we'd like to download:
            #  1) sdist
            #  2) wheels
            #  3) eggs
            #  4) anything else (creepy stuff)
            def release_key(rel):
                return {
                    'sdist': 0,
                    'bdist_wheel': 1,
                    'bdist_egg': 2
                }.get(rel['packagetype'], 3)

            release_files = list(sorted(release_files, key=release_key))
            file_url = release_files[0]['url']
            local_filename = IndianaJones.download_file(file_url, target_dir)
            artifact_path = os.path.join(target_dir, local_filename)
            digest = compute_digest(artifact_path)
            Archive.extract(artifact_path, target_dir)
            git.add_and_commit_everything()
        elif ecosystem.is_backed_by(EcosystemBackend.npm):
            git = Git.create_git(target_dir)

            # $ npm config get cache
            # /root/.npm
            cache_path = TimedCommand.get_command_output(
                ['npm', 'config', 'get', 'cache'], graceful=False).pop()

            # add package to cache:
            # /root/.npm/express/
            # └── 4.13.4
            #      ├── package
            #      │   ├── History.md
            #      │   ├── index.js
            #      │   ├── lib
            #      │   ├── LICENSE
            #      │   ├── package.json
            #      │   └── Readme.md
            #      └── package.tgz
            # 3 directories, 6 files
            name_ver = artifact
            if version:
                name_ver = "{}@{}".format(artifact, version)
            # make sure the artifact is not in the cache yet
            TimedCommand.get_command_output(
                ['npm', 'cache', 'clean', artifact], graceful=False)
            logger.info("downloading npm module %s", name_ver)
            npm_command = ['npm', 'cache', 'add', name_ver]
            TimedCommand.get_command_output(npm_command, graceful=False)

            # copy tarball to workpath
            tarball_name = "package.tgz"
            glob_path = os.path.join(cache_path, artifact, "*")
            cache_abs_path = os.path.abspath(glob.glob(glob_path).pop())
            artifact_path = os.path.join(cache_abs_path, tarball_name)
            logger.debug("[cache] tarball path = %s", artifact_path)
            artifact_path = shutil.copy(artifact_path, target_dir)

            logger.debug("[workdir] tarball path = %s", artifact_path)
            # Prior to npm-2.x.x (Fedora 24)
            # npm client was repackaging modules on download. It modified file permissions inside
            # package.tgz so they matched UID/GID of a user running npm command. Therefore its
            # digest was different then of a tarball downloaded directly from registry.npmjs.org.
            digest = compute_digest(artifact_path)
            Archive.extract(artifact_path, target_dir)

            # copy package/package.json over the extracted one,
            # because it contains (since npm >= 2.x.x) more information.
            npm_package_json = os.path.join(cache_abs_path, 'package',
                                            'package.json')
            shutil.copy(npm_package_json, target_dir)
            # copy package/npm-shrinkwrap.json to target_dir
            npm_shrinkwrap_json = os.path.join(target_dir, 'package',
                                               'npm-shrinkwrap.json')
            if os.path.isfile(npm_shrinkwrap_json):
                shutil.copy(npm_shrinkwrap_json, target_dir)
            git.add_and_commit_everything()
        elif ecosystem.is_backed_by(EcosystemBackend.rubygems):
            git = Git.create_git(target_dir)
            logger.info("downloading rubygems package %s-%s", artifact,
                        version)
            version_arg = []
            if version:
                version_arg = ['--version', version]
            gem_command = ['gem', 'fetch', artifact]
            gem_command.extend(version_arg)
            with cwd(target_dir):
                TimedCommand.get_command_output(gem_command, graceful=False)

            if not version:
                # if version is None we need to glob for the version that was downloaded
                artifact_path = os.path.abspath(
                    glob.glob(os.path.join(target_dir, artifact + '*')).pop())
            else:
                artifact_path = os.path.join(
                    target_dir, '{n}-{v}.gem'.format(n=artifact, v=version))

            digest = compute_digest(artifact_path)
            Archive.extract(artifact_path, target_dir)
            git.add_and_commit_everything()
        elif ecosystem.is_backed_by(EcosystemBackend.maven):
            git = Git.create_git(target_dir)
            artifact_coords = MavenCoordinates.from_str(artifact)
            # lxml can't handle HTTPS URLs
            maven_url = "http://repo1.maven.org/maven2/"
            if not version:
                version = mvn_find_latest_version(maven_url, artifact_coords)
            artifact_coords.version = version
            logger.info("downloading maven package %s",
                        artifact_coords.to_str())

            if not artifact_coords.is_valid():
                raise ValueError("Invalid Maven coordinates: {a}".format(
                    a=artifact_coords.to_str()))

            artifact_url = urljoin(maven_url, artifact_coords.to_repo_url())
            local_filename = IndianaJones.download_file(
                artifact_url, target_dir)
            if local_filename is None:
                raise RuntimeError("Unable to download: %s" % artifact_url)
            artifact_path = os.path.join(
                target_dir,
                os.path.split(artifact_coords.to_repo_url())[1])
            digest = compute_digest(artifact_path)
            if artifact_coords.packaging != 'pom':
                Archive.extract(artifact_path, target_dir)
            git.add_and_commit_everything()
        elif ecosystem.is_backed_by(EcosystemBackend.scm):
            git = Git.clone(artifact, target_dir)
            digest = IndianaJones.get_revision(target_dir)
            artifact_path = git.archive(artifact)
        elif parsed:
            if parsed[0] == 'git' or parsed[2].endswith('.git'):
                git = Git.clone(artifact, target_dir)
                digest = IndianaJones.get_revision(target_dir)
                artifact_path = git.archive(artifact)

        return digest, artifact_path
 def test_compute_digest(self):
     assert compute_digest("/etc/os-release")
     with pytest.raises(TaskError):
         assert compute_digest("/", raise_on_error=True)
     assert compute_digest("/") is None