def compute_digests(self, cache_path, f, artifact=False): f_digests = { 'sha256': compute_digest(f, 'sha256'), 'sha1': compute_digest(f, 'sha1'), 'md5': compute_digest(f, 'md5'), 'ssdeep': self.compute_ssdeep(f) } if artifact: f_digests['artifact'] = True f_digests['path'] = os.path.basename(f) else: f_digests['path'] = os.path.relpath(f, cache_path) return f_digests
def test_execute(self, tmpdir): artifact_digest, artifact_path = IndianaJones.fetch_artifact( Ecosystem(name='pypi', backend=EcosystemBackend.pypi), artifact=PYPI_MODULE_NAME, version=PYPI_MODULE_VERSION, target_dir=str(tmpdir)) args = dict.fromkeys(('ecosystem', 'name', 'version'), 'some-value') flexmock(EPVCache).should_receive( 'get_extracted_source_tarball').and_return(str(tmpdir)) flexmock(EPVCache).should_receive('get_source_tarball').and_return( artifact_path) task = DigesterTask.create_test_instance(task_name='digests') results = task.execute(arguments=args) assert results is not None assert isinstance(results, dict) assert set(results.keys()) == {'details', 'status', 'summary'} artifact_details = None for details in results['details']: assert {'sha256', 'sha1', 'md5', 'ssdeep', 'path'}.issubset(set(details.keys())) if details.get('artifact'): artifact_details = details # there are artifact details assert artifact_details is not None # the artifact digest which Indy returns is the same as the one from DigesterTask assert artifact_digest == artifact_details['sha256'] == compute_digest( artifact_path) assert artifact_details['path'] == 'six-1.0.0.tar.gz'
def fetch_artifact(ecosystem=None, artifact=None, version=None, target_dir='.'): """ download artifact from registry and process it :param ecosystem: :param artifact: :param version: :param target_dir: :return: tuple: (digest, artifact_path) """ parsed = urlparse(artifact) digest = None artifact_path = None if ecosystem.is_backed_by(EcosystemBackend.pypi): git = Git.create_git(target_dir) # NOTE: we can't download Python packages via pip, because it runs setup.py # even with `pip download`. Therefore we could always get syntax errors # because of older/newer syntax. res = requests.get( 'https://pypi.python.org/pypi/{a}/json'.format(a=artifact)) res.raise_for_status() if not version: version = res.json()['info']['version'] release_files = res.json()['releases'][version] # sort releases by order in which we'd like to download: # 1) sdist # 2) wheels # 3) eggs # 4) anything else (creepy stuff) def release_key(rel): return { 'sdist': 0, 'bdist_wheel': 1, 'bdist_egg': 2 }.get(rel['packagetype'], 3) release_files = list(sorted(release_files, key=release_key)) file_url = release_files[0]['url'] local_filename = IndianaJones.download_file(file_url, target_dir) artifact_path = os.path.join(target_dir, local_filename) digest = compute_digest(artifact_path) Archive.extract(artifact_path, target_dir) git.add_and_commit_everything() elif ecosystem.is_backed_by(EcosystemBackend.npm): git = Git.create_git(target_dir) # $ npm config get cache # /root/.npm cache_path = TimedCommand.get_command_output( ['npm', 'config', 'get', 'cache'], graceful=False).pop() # add package to cache: # /root/.npm/express/ # └── 4.13.4 # ├── package # │ ├── History.md # │ ├── index.js # │ ├── lib # │ ├── LICENSE # │ ├── package.json # │ └── Readme.md # └── package.tgz # 3 directories, 6 files name_ver = artifact if version: name_ver = "{}@{}".format(artifact, version) # make sure the artifact is not in the cache yet TimedCommand.get_command_output( ['npm', 'cache', 'clean', artifact], graceful=False) logger.info("downloading npm module %s", name_ver) npm_command = ['npm', 'cache', 'add', name_ver] TimedCommand.get_command_output(npm_command, graceful=False) # copy tarball to workpath tarball_name = "package.tgz" glob_path = os.path.join(cache_path, artifact, "*") cache_abs_path = os.path.abspath(glob.glob(glob_path).pop()) artifact_path = os.path.join(cache_abs_path, tarball_name) logger.debug("[cache] tarball path = %s", artifact_path) artifact_path = shutil.copy(artifact_path, target_dir) logger.debug("[workdir] tarball path = %s", artifact_path) # Prior to npm-2.x.x (Fedora 24) # npm client was repackaging modules on download. It modified file permissions inside # package.tgz so they matched UID/GID of a user running npm command. Therefore its # digest was different then of a tarball downloaded directly from registry.npmjs.org. digest = compute_digest(artifact_path) Archive.extract(artifact_path, target_dir) # copy package/package.json over the extracted one, # because it contains (since npm >= 2.x.x) more information. npm_package_json = os.path.join(cache_abs_path, 'package', 'package.json') shutil.copy(npm_package_json, target_dir) # copy package/npm-shrinkwrap.json to target_dir npm_shrinkwrap_json = os.path.join(target_dir, 'package', 'npm-shrinkwrap.json') if os.path.isfile(npm_shrinkwrap_json): shutil.copy(npm_shrinkwrap_json, target_dir) git.add_and_commit_everything() elif ecosystem.is_backed_by(EcosystemBackend.rubygems): git = Git.create_git(target_dir) logger.info("downloading rubygems package %s-%s", artifact, version) version_arg = [] if version: version_arg = ['--version', version] gem_command = ['gem', 'fetch', artifact] gem_command.extend(version_arg) with cwd(target_dir): TimedCommand.get_command_output(gem_command, graceful=False) if not version: # if version is None we need to glob for the version that was downloaded artifact_path = os.path.abspath( glob.glob(os.path.join(target_dir, artifact + '*')).pop()) else: artifact_path = os.path.join( target_dir, '{n}-{v}.gem'.format(n=artifact, v=version)) digest = compute_digest(artifact_path) Archive.extract(artifact_path, target_dir) git.add_and_commit_everything() elif ecosystem.is_backed_by(EcosystemBackend.maven): git = Git.create_git(target_dir) artifact_coords = MavenCoordinates.from_str(artifact) # lxml can't handle HTTPS URLs maven_url = "http://repo1.maven.org/maven2/" if not version: version = mvn_find_latest_version(maven_url, artifact_coords) artifact_coords.version = version logger.info("downloading maven package %s", artifact_coords.to_str()) if not artifact_coords.is_valid(): raise ValueError("Invalid Maven coordinates: {a}".format( a=artifact_coords.to_str())) artifact_url = urljoin(maven_url, artifact_coords.to_repo_url()) local_filename = IndianaJones.download_file( artifact_url, target_dir) if local_filename is None: raise RuntimeError("Unable to download: %s" % artifact_url) artifact_path = os.path.join( target_dir, os.path.split(artifact_coords.to_repo_url())[1]) digest = compute_digest(artifact_path) if artifact_coords.packaging != 'pom': Archive.extract(artifact_path, target_dir) git.add_and_commit_everything() elif ecosystem.is_backed_by(EcosystemBackend.scm): git = Git.clone(artifact, target_dir) digest = IndianaJones.get_revision(target_dir) artifact_path = git.archive(artifact) elif parsed: if parsed[0] == 'git' or parsed[2].endswith('.git'): git = Git.clone(artifact, target_dir) digest = IndianaJones.get_revision(target_dir) artifact_path = git.archive(artifact) return digest, artifact_path
def test_compute_digest(self): assert compute_digest("/etc/os-release") with pytest.raises(TaskError): assert compute_digest("/", raise_on_error=True) assert compute_digest("/") is None