def test_compute_digest_for_empty_input(self): """Test compute_digest() for empty input.""" # please see https://www.di-mgt.com.au/sha_testvectors.html # for explanation assert compute_digest("/dev/null") is not None SHA256_FOR_EMPTY_INPUT = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" assert compute_digest("/dev/null") == SHA256_FOR_EMPTY_INPUT
def compute_digests(self, cache_path, f, artifact=False): f_digests = { 'sha256': compute_digest(f, 'sha256'), 'sha1': compute_digest(f, 'sha1'), 'md5': compute_digest(f, 'md5'), 'ssdeep': self.compute_ssdeep(f) } if artifact: f_digests['artifact'] = True f_digests['path'] = os.path.basename(f) else: f_digests['path'] = os.path.relpath(f, cache_path) return f_digests
def test_execute(self, tmpdir): artifact_digest, artifact_path = IndianaJones.fetch_artifact( Ecosystem(name='pypi', backend=EcosystemBackend.pypi), artifact=PYPI_MODULE_NAME, version=PYPI_MODULE_VERSION, target_dir=str(tmpdir)) args = dict.fromkeys(('ecosystem', 'name', 'version'), 'some-value') # flexmock(EPVCache).should_receive('get_extracted_source_tarball').and_return(str(tmpdir)) flexmock(EPVCache).should_receive('get_source_tarball').and_return( artifact_path) task = DigesterTask.create_test_instance(task_name='digests') results = task.execute(arguments=args) assert results is not None assert isinstance(results, dict) assert set(results.keys()) == {'details', 'status', 'summary'} artifact_details = None for details in results['details']: assert {'sha256', 'sha1', 'md5', 'ssdeep', 'path'}.issubset(set(details.keys())) if details.get('artifact'): artifact_details = details # there are artifact details assert artifact_details is not None # the artifact digest which Indy returns is the same as the one from DigesterTask assert artifact_digest == artifact_details['sha256'] == compute_digest( artifact_path) assert artifact_details['path'] == 'six-1.0.0.tar.gz'
def fetch_maven_artifact(ecosystem, name, version, target_dir): """Fetch maven artifact from maven.org.""" git = Git.create_git(target_dir) artifact_coords = MavenCoordinates.from_str(name) if not version: raise ValueError("No version provided for '%s'" % artifact_coords.to_str()) artifact_coords.version = version if not artifact_coords.is_valid(): raise NotABugTaskError("Invalid Maven coordinates: {a}".format( a=artifact_coords.to_str())) maven_url = ecosystem.fetch_url artifact_url = urljoin(maven_url, artifact_coords.to_repo_url()) local_filepath = IndianaJones.download_file(artifact_url, target_dir) if local_filepath is None: raise NotABugTaskError("Unable to download: %s" % artifact_url) local_filename = os.path.split(local_filepath)[1] artifact_path = os.path.join(target_dir, local_filename) digest = compute_digest(artifact_path) if artifact_coords.packaging != 'pom': Archive.extract(artifact_path, target_dir) if artifact_coords.packaging == 'aar': # 'aar' archive contains classes.jar, extract it too into target_dir classes_jar_path = os.path.join(target_dir, "classes.jar") if os.path.isfile(classes_jar_path): Archive.extract(classes_jar_path, target_dir) os.remove(classes_jar_path) git.add_and_commit_everything() return digest, artifact_path
def fetch_pypi_artifact(name, version, target_dir): """Fetch Pypi artifact.""" git = Git.create_git(target_dir) # NOTE: we can't download Python packages via pip, because it runs setup.py # even with `pip download`. Therefore we could always get syntax errors # because of older/newer syntax. res = requests.get('https://pypi.python.org/pypi/{n}/json'.format(n=name)) res.raise_for_status() if not version: version = res.json()['info']['version'] release_files = res.json().get('releases', {}).get(version, []) if not release_files: raise RuntimeError("No release files for version %s" % version) # sort releases by order in which we'd like to download: # 1) sdist # 2) wheels # 3) eggs # 4) anything else (creepy stuff) def release_key(rel): return {'sdist': 0, 'bdist_wheel': 1, 'bdist_egg': 2}.get(rel['packagetype'], 3) release_files = list(sorted(release_files, key=release_key)) file_url = release_files[0]['url'] local_filename = IndianaJones.download_file(file_url, target_dir) if local_filename is None: raise RuntimeError("Unable to download: %s" % file_url) artifact_path = os.path.join(target_dir, local_filename) digest = compute_digest(artifact_path) Archive.extract(artifact_path, target_dir) git.add_and_commit_everything() return digest, artifact_path
def fetch_npm_artifact(name, version, target_dir): """Fetch npm artifact using system 'npm' tool.""" git = Git.create_git(target_dir) # $ npm config get cache # /root/.npm cache_path = TimedCommand.get_command_output( ['npm', 'config', 'get', 'cache'], graceful=False).pop() # add package to cache: # /root/.npm/express/ # └── 4.13.4 # ├── package # │ ├── History.md # │ ├── index.js # │ ├── lib # │ ├── LICENSE # │ ├── package.json # │ └── Readme.md # └── package.tgz # 3 directories, 6 files name_ver = name if version: name_ver = "{}@{}".format(name, version) # make sure the artifact is not in the cache yet TimedCommand.get_command_output(['npm', 'cache', 'clean', name], graceful=False) logger.info("downloading npm module %s", name_ver) npm_command = ['npm', 'cache', 'add', name_ver] TimedCommand.get_command_output(npm_command, graceful=False) # copy tarball to workpath tarball_name = "package.tgz" glob_path = os.path.join(cache_path, name, "*") cache_abs_path = os.path.abspath(glob.glob(glob_path).pop()) artifact_path = os.path.join(cache_abs_path, tarball_name) logger.debug("[cache] tarball path = %s", artifact_path) artifact_path = shutil.copy(artifact_path, target_dir) logger.debug("[workdir] tarball path = %s", artifact_path) # Prior to npm-2.x.x (Fedora 24) # npm client was repackaging modules on download. It modified file permissions inside # package.tgz so they matched UID/GID of a user running npm command. Therefore its # digest was different then of a tarball downloaded directly from registry.npmjs.org. digest = compute_digest(artifact_path) Archive.extract(artifact_path, target_dir) # copy package/package.json over the extracted one, # because it contains (since npm >= 2.x.x) more information. npm_package_json = os.path.join(cache_abs_path, 'package', 'package.json') shutil.copy(npm_package_json, target_dir) # copy package/npm-shrinkwrap.json to target_dir npm_shrinkwrap_json = os.path.join(target_dir, 'package', 'npm-shrinkwrap.json') if os.path.isfile(npm_shrinkwrap_json): shutil.copy(npm_shrinkwrap_json, target_dir) git.add_and_commit_everything() return digest, artifact_path
def fetch_nuget_artifact(name, version, target_dir): git = Git.create_git(target_dir) nuget_url = 'https://api.nuget.org/packages/' file_url = '{url}{name}.{version}.nupkg'.format( url=nuget_url, name=name.lower(), version=version.lower()) local_filename = IndianaJones.download_file(file_url, target_dir) if local_filename is None: raise RuntimeError("Unable to download: %s" % file_url) artifact_path = os.path.join(target_dir, local_filename) digest = compute_digest(artifact_path) Archive.extract(artifact_path, target_dir) git.add_and_commit_everything() return digest, artifact_path
def fetch_nuget_artifact(ecosystem, name, version, target_dir): """Fetch nuget artifact from nuget.org.""" git = Git.create_git(target_dir) nuget_url = ecosystem.fetch_url file_url = '{url}{name}.{version}.nupkg'.format( url=nuget_url, name=name.lower(), version=version.lower()) local_filename = IndianaJones.download_file(file_url, target_dir) if local_filename is None: raise NotABugTaskError("Unable to download: %s" % file_url) artifact_path = os.path.join(target_dir, local_filename) digest = compute_digest(artifact_path) Archive.extract(artifact_path, target_dir) git.add_and_commit_everything() return digest, artifact_path
def fetch_scm_artifact(name, version, target_dir): env = dict(os.environ) env['GOPATH'] = target_dir TimedCommand.get_command_output(['go', 'get', '-d', name], timeout=300, env=env, graceful=True) package_dir = os.path.join(target_dir, 'src', name) with cwd(package_dir): git = Git(package_dir) git.reset(version, hard=True) artifact_filename = git.archive(version) artifact_path = os.path.join(package_dir, artifact_filename) digest = compute_digest(artifact_path) return digest, artifact_path
def fetch_go_artifact(name, version, target_dir): """Fetch go artifact using 'go get' command.""" env = dict(os.environ) env['GOPATH'] = target_dir Git.config() try: TimedCommand.get_command_output(['go', 'get', '-d', name], timeout=300, env=env, graceful=False) except TaskError: raise NotABugTaskError('Unable to go-get {n}'.format(n=name)) package_dir = os.path.join(target_dir, 'src', name) with cwd(package_dir): git = Git(package_dir) git.reset(version, hard=True) artifact_filename = git.archive(version) artifact_path = os.path.join(package_dir, artifact_filename) digest = compute_digest(artifact_path) return digest, artifact_path
def fetch_rubygems_artifact(name, version, target_dir): git = Git.create_git(target_dir) logger.info("downloading rubygems package %s-%s", name, version) version_arg = [] if version: version_arg = ['--version', version] gem_command = ['gem', 'fetch', name] gem_command.extend(version_arg) with cwd(target_dir): TimedCommand.get_command_output(gem_command, graceful=False) if not version: # if version is None we need to glob for the version that was downloaded artifact_path = os.path.abspath( glob.glob(os.path.join(target_dir, name + '*')).pop()) else: artifact_path = os.path.join( target_dir, '{n}-{v}.gem'.format(n=name, v=version)) digest = compute_digest(artifact_path) Archive.extract(artifact_path, target_dir) git.add_and_commit_everything() return digest, artifact_path
def fetch_npm_artifact(ecosystem, name, version, target_dir): """Fetch npm artifact using system 'npm' tool.""" git = Git.create_git(target_dir) npm_cmd = ['npm', '--registry', ecosystem.fetch_url] # $ npm config get cache # /root/.npm cache_path = TimedCommand.get_command_output( npm_cmd + ['config', 'get', 'cache'], graceful=False).pop() # add package to cache: # /root/.npm/express/ # └── 4.13.4 # ├── package # │ ├── History.md # │ ├── index.js # │ ├── lib # │ ├── LICENSE # │ ├── package.json # │ └── Readme.md # └── package.tgz # 3 directories, 6 files name_ver = name try: # importing here to avoid circular dependency from f8a_worker.solver import NpmReleasesFetcher version_list = NpmReleasesFetcher(ecosystem).fetch_releases( name_ver)[1] if version not in version_list: raise NotABugTaskError( "Provided version is not supported '%s'" % name) else: name_ver = "{}@{}".format(name, version) except ValueError as e: raise NotABugTaskError( 'No versions for package NPM package {p} ({e})'.format( p=name, e=str(e))) # make sure the artifact is not in the cache yet TimedCommand.get_command_output(npm_cmd + ['cache', 'clean', name], graceful=False) logger.info("downloading npm module %s", name_ver) cmd = npm_cmd + ['cache', 'add', name_ver] TimedCommand.get_command_output(cmd, graceful=False) # copy tarball to workpath tarball_name = "package.tgz" glob_path = os.path.join(cache_path, name, "*") cache_abs_path = os.path.abspath(glob.glob(glob_path).pop()) artifact_path = os.path.join(cache_abs_path, tarball_name) logger.debug("[cache] tarball path = %s", artifact_path) artifact_path = shutil.copy(artifact_path, target_dir) logger.debug("[workdir] tarball path = %s", artifact_path) # Prior to npm-2.x.x (Fedora 24) # npm client was repackaging modules on download. It modified file permissions inside # package.tgz so they matched UID/GID of a user running npm command. Therefore its # digest was different then of a tarball downloaded directly from registry.npmjs.org. digest = compute_digest(artifact_path) Archive.extract(artifact_path, target_dir) Archive.fix_permissions(os.path.join(cache_abs_path, 'package')) # copy package/package.json over the extracted one, # because it contains (since npm >= 2.x.x) more information. npm_package_json = os.path.join(cache_abs_path, 'package', 'package.json') shutil.copy(npm_package_json, target_dir) # copy package/npm-shrinkwrap.json to target_dir npm_shrinkwrap_json = os.path.join(target_dir, 'package', 'npm-shrinkwrap.json') if os.path.isfile(npm_shrinkwrap_json): shutil.copy(npm_shrinkwrap_json, target_dir) git.add_and_commit_everything() return digest, artifact_path
def test_compute_digest(self): """Test compute_digest().""" assert compute_digest("/etc/os-release") with pytest.raises(TaskError): assert compute_digest("/", raise_on_error=True) assert compute_digest("/") is None