def test_from_str(self, coords, from_str, is_from_str_ok, to_str, to_str_omit_version, to_repo_url): from_strings = from_str if isinstance(from_str, list) else [from_str] for fstr in from_strings: if is_from_str_ok: assert MavenCoordinates.from_str(fstr) == coords else: with pytest.raises(ValueError): MavenCoordinates.from_str(fstr)
def server_create_analysis(ecosystem, package, version, api_flow=True, force=False, force_graph_sync=False): """Create bayesianApiFlow handling analyses for specified EPV :param ecosystem: ecosystem for which the flow should be run :param package: package for which should be flow run :param version: package version :param force: force run flow even specified EPV exists :param force_graph_sync: force synchronization to graph :return: dispatcher ID handling flow """ args = { 'ecosystem': ecosystem, 'name': MavenCoordinates.normalize_str(package) if ecosystem == 'maven' else package, 'version': version, 'force': force, 'force_graph_sync': force_graph_sync } if api_flow: return server_run_flow('bayesianApiFlow', args) else: return server_run_flow('bayesianFlow', args)
def get(self, ecosystem, package, version): if ecosystem == 'maven': package = MavenCoordinates.normalize_str(package) result = get_analyses_from_graph(ecosystem, package, version) current_app.logger.warn("%r" % result) if result != None: # Known component for Bayesian return result if os.environ.get("INVOKE_API_WORKERS", "") == "1": # Enter the unknown path server_create_analysis(ecosystem, package, version, api_flow=True, force=False, force_graph_sync=True) msg = "{ecosystem} Package {package}/{version} is unavailable. The package will be available shortly,"\ " please retry after some time.".format(ecosystem=ecosystem, package=package, version=version) raise HTTPError(202, msg) else: server_create_analysis(ecosystem, package, version, api_flow=False, force=False, force_graph_sync=True) msg = "No data found for {ecosystem} Package {package}/{version}".format(ecosystem=ecosystem,\ package=package, version=version) raise HTTPError(404, msg)
def get(self, ecosystem, package): args = pagination_parser.parse_args() package = urllib.parse.unquote(package) if ecosystem == 'maven': package = MavenCoordinates.normalize_str(package) package_found = rdb.session.query(Package).\ join(Ecosystem).\ filter(Ecosystem.name == ecosystem, Package.name == package).\ count() if package_found == 0: raise HTTPError(404, error="Package '{e}/{p}' not tracked".format( p=package, e=ecosystem)) query = rdb.session.query(Version).\ join(Package).join(Ecosystem).\ filter(Ecosystem.name == ecosystem, Package.name == package) count = query.count() versions = query.\ filter(Ecosystem.name == ecosystem, Package.name == package).\ order_by(Version.identifier.asc()).\ offset(get_item_skip(args['page'], args['per_page'])).\ limit(get_item_relative_limit(args['page'], args['per_page'])) items = [{ 'ecosystem': ecosystem, 'package': package, 'version': v.identifier } for v in versions] return {TOTAL_COUNT_KEY: count, 'items': items}
def get_latest_analysis_for(ecosystem, package, version): """Note: has to be called inside flask request context""" try: if ecosystem == 'maven': package = MavenCoordinates.normalize_str(package) return rdb.session.query(Analysis).\ join(Version).join(Package).join(Ecosystem).\ filter(Ecosystem.name == ecosystem).\ filter(Package.name == package).\ filter(Version.identifier == version).\ order_by(Analysis.started_at.desc()).\ first() except NoResultFound: return None
def get_analysis_count(self, ecosystem, package): """Get count of previously scheduled analyses for given ecosystem-package. :param ecosystem: str, Ecosystem name :param package: str, Package name :return: analysis count """ if ecosystem == 'maven': package = MavenCoordinates.normalize_str(package) count = PostgresBase.session.query(PackageAnalysis).\ join(Package).join(Ecosystem).\ filter(Ecosystem.name == ecosystem).\ filter(Package.name == package).\ count() return count
def get_analysis_by_id(self, ecosystem, package, analysis_id): """Get result of previously scheduled analysis for given ecosystem-package triplet by analysis ID :param ecosystem: str, Ecosystem name :param package: str, Package name :param analysis_id: str, ID of analysis :return: analysis result """ if ecosystem == 'maven': package = MavenCoordinates.normalize_str(package) found = self.session.query(PackageAnalysis).\ filter(Ecosystem.name == ecosystem).\ filter(Package.name == package).\ filter(PackageAnalysis.id == analysis_id).\ one() return found
def get_analysis_count(self, ecosystem, package, version): """Get count of previously scheduled analysis for given EPV triplet :param ecosystem: str, Ecosystem name :param package: str, Package name :param version: str, Package version :return: analysis count """ if ecosystem == 'maven': package = MavenCoordinates.normalize_str(package) count = self.session.query(Analysis).\ join(Version).join(Package).join(Ecosystem).\ filter(Ecosystem.name == ecosystem).\ filter(Package.name == package).\ filter(Version.identifier == version).\ count() return count
def fetch_artifact(ecosystem=None, artifact=None, version=None, target_dir='.'): """ download artifact from registry and process it :param ecosystem: :param artifact: :param version: :param target_dir: :return: tuple: (digest, artifact_path) """ parsed = urlparse(artifact) digest = None artifact_path = None if ecosystem.is_backed_by(EcosystemBackend.pypi): git = Git.create_git(target_dir) # NOTE: we can't download Python packages via pip, because it runs setup.py # even with `pip download`. Therefore we could always get syntax errors # because of older/newer syntax. res = requests.get( 'https://pypi.python.org/pypi/{a}/json'.format(a=artifact)) res.raise_for_status() if not version: version = res.json()['info']['version'] release_files = res.json()['releases'][version] # sort releases by order in which we'd like to download: # 1) sdist # 2) wheels # 3) eggs # 4) anything else (creepy stuff) def release_key(rel): return { 'sdist': 0, 'bdist_wheel': 1, 'bdist_egg': 2 }.get(rel['packagetype'], 3) release_files = list(sorted(release_files, key=release_key)) file_url = release_files[0]['url'] local_filename = IndianaJones.download_file(file_url, target_dir) artifact_path = os.path.join(target_dir, local_filename) digest = compute_digest(artifact_path) Archive.extract(artifact_path, target_dir) git.add_and_commit_everything() elif ecosystem.is_backed_by(EcosystemBackend.npm): git = Git.create_git(target_dir) # $ npm config get cache # /root/.npm cache_path = TimedCommand.get_command_output( ['npm', 'config', 'get', 'cache'], graceful=False).pop() # add package to cache: # /root/.npm/express/ # └── 4.13.4 # ├── package # │ ├── History.md # │ ├── index.js # │ ├── lib # │ ├── LICENSE # │ ├── package.json # │ └── Readme.md # └── package.tgz # 3 directories, 6 files name_ver = artifact if version: name_ver = "{}@{}".format(artifact, version) # make sure the artifact is not in the cache yet TimedCommand.get_command_output( ['npm', 'cache', 'clean', artifact], graceful=False) logger.info("downloading npm module %s", name_ver) npm_command = ['npm', 'cache', 'add', name_ver] TimedCommand.get_command_output(npm_command, graceful=False) # copy tarball to workpath tarball_name = "package.tgz" glob_path = os.path.join(cache_path, artifact, "*") cache_abs_path = os.path.abspath(glob.glob(glob_path).pop()) artifact_path = os.path.join(cache_abs_path, tarball_name) logger.debug("[cache] tarball path = %s", artifact_path) artifact_path = shutil.copy(artifact_path, target_dir) logger.debug("[workdir] tarball path = %s", artifact_path) # Prior to npm-2.x.x (Fedora 24) # npm client was repackaging modules on download. It modified file permissions inside # package.tgz so they matched UID/GID of a user running npm command. Therefore its # digest was different then of a tarball downloaded directly from registry.npmjs.org. digest = compute_digest(artifact_path) Archive.extract(artifact_path, target_dir) # copy package/package.json over the extracted one, # because it contains (since npm >= 2.x.x) more information. npm_package_json = os.path.join(cache_abs_path, 'package', 'package.json') shutil.copy(npm_package_json, target_dir) # copy package/npm-shrinkwrap.json to target_dir npm_shrinkwrap_json = os.path.join(target_dir, 'package', 'npm-shrinkwrap.json') if os.path.isfile(npm_shrinkwrap_json): shutil.copy(npm_shrinkwrap_json, target_dir) git.add_and_commit_everything() elif ecosystem.is_backed_by(EcosystemBackend.rubygems): git = Git.create_git(target_dir) logger.info("downloading rubygems package %s-%s", artifact, version) version_arg = [] if version: version_arg = ['--version', version] gem_command = ['gem', 'fetch', artifact] gem_command.extend(version_arg) with cwd(target_dir): TimedCommand.get_command_output(gem_command, graceful=False) if not version: # if version is None we need to glob for the version that was downloaded artifact_path = os.path.abspath( glob.glob(os.path.join(target_dir, artifact + '*')).pop()) else: artifact_path = os.path.join( target_dir, '{n}-{v}.gem'.format(n=artifact, v=version)) digest = compute_digest(artifact_path) Archive.extract(artifact_path, target_dir) git.add_and_commit_everything() elif ecosystem.is_backed_by(EcosystemBackend.maven): git = Git.create_git(target_dir) artifact_coords = MavenCoordinates.from_str(artifact) # lxml can't handle HTTPS URLs maven_url = "http://repo1.maven.org/maven2/" if not version: version = mvn_find_latest_version(maven_url, artifact_coords) artifact_coords.version = version logger.info("downloading maven package %s", artifact_coords.to_str()) if not artifact_coords.is_valid(): raise ValueError("Invalid Maven coordinates: {a}".format( a=artifact_coords.to_str())) artifact_url = urljoin(maven_url, artifact_coords.to_repo_url()) local_filename = IndianaJones.download_file( artifact_url, target_dir) if local_filename is None: raise RuntimeError("Unable to download: %s" % artifact_url) artifact_path = os.path.join( target_dir, os.path.split(artifact_coords.to_repo_url())[1]) digest = compute_digest(artifact_path) if artifact_coords.packaging != 'pom': Archive.extract(artifact_path, target_dir) git.add_and_commit_everything() elif ecosystem.is_backed_by(EcosystemBackend.scm): git = Git.clone(artifact, target_dir) digest = IndianaJones.get_revision(target_dir) artifact_path = git.archive(artifact) elif parsed: if parsed[0] == 'git' or parsed[2].endswith('.git'): git = Git.clone(artifact, target_dir) digest = IndianaJones.get_revision(target_dir) artifact_path = git.archive(artifact) return digest, artifact_path
def _create_analysis_arguments(ecosystem, name, version): return { 'ecosystem': ecosystem, 'name': MavenCoordinates.normalize_str(name) if ecosystem == 'maven' else name, 'version': version }
def foo(x): s.add(x) original = set(range(0, 10)) tp = ThreadPool(foo) for i in original: tp.add_task(i) tp.start() tp.join() assert s == original example_coordinates = [ # MavenCoordinates(), from_str, is_from_str_ok, to_str, to_str(omit_version=True), to_repo_url (MavenCoordinates('g', 'a'), 'g:a', True, 'g:a', 'g:a', None), (MavenCoordinates('g', 'a', '1'), 'g:a:1', True, 'g:a:1', 'g:a', 'g/a/1/a-1.jar'), (MavenCoordinates('g', 'a', packaging='war'), 'g:a:war:', True, 'g:a:war:', 'g:a:war:', None), (MavenCoordinates('g', 'a', '1', packaging='war'), ['g:a:war:1', 'g:a:war::1'], True, 'g:a:war:1', 'g:a:war:', 'g/a/1/a-1.war'), (MavenCoordinates('g', 'a', classifier='sources'), 'g:a::sources:', True, 'g:a::sources:', 'g:a::sources:', None), (MavenCoordinates('g', 'a', '1', classifier='sources'), 'g:a::sources:1', True, 'g:a::sources:1', 'g:a::sources:', 'g/a/1/a-1-sources.jar'), (MavenCoordinates('g', 'a', packaging='war', classifier='sources'), 'g:a:war:sources:', True, 'g:a:war:sources:', 'g:a:war:sources:', None), (MavenCoordinates('g', 'a', '1', packaging='war', classifier='sources'), 'g:a:war:sources:1', True,
def test_mvn_find_latest_version(self): repo_url = os.path.join(os.path.dirname(__file__), 'data/maven/') a = MavenCoordinates('org.junit', 'junit') latest = mvn_find_latest_version(repo_url, a) assert latest == '4.12'