def get_response(url, headers=None, sleep_time=2, retry_count=10): """Wrap requests which tries to get response. :param url: URL where to do the request :param headers: additional headers for request :param sleep_time: sleep time between retries :param retry_count: number of retries :return: content of response's json """ try: for _ in range(retry_count): response = requests.get(url, headers=headers) response.raise_for_status() if response.status_code == 204: # json() below would otherwise fail with JSONDecodeError raise HTTPError('No content') response = response.json() if response: return response time.sleep(sleep_time) else: raise NotABugTaskError("Number of retries exceeded") except HTTPError as err: message = "Failed to get results from {url} with {err}".format(url=url, err=err) logger.error(message) raise NotABugTaskError(message) from err
def fetch_maven_artifact(ecosystem, name, version, target_dir): """Fetch maven artifact from maven.org.""" git = Git.create_git(target_dir) artifact_coords = MavenCoordinates.from_str(name) if not version: raise ValueError("No version provided for '%s'" % artifact_coords.to_str()) artifact_coords.version = version if not artifact_coords.is_valid(): raise NotABugTaskError("Invalid Maven coordinates: {a}".format( a=artifact_coords.to_str())) maven_url = ecosystem.fetch_url artifact_url = urljoin(maven_url, artifact_coords.to_repo_url()) local_filepath = IndianaJones.download_file(artifact_url, target_dir) if local_filepath is None: raise NotABugTaskError("Unable to download: %s" % artifact_url) local_filename = os.path.split(local_filepath)[1] artifact_path = os.path.join(target_dir, local_filename) digest = compute_digest(artifact_path) if artifact_coords.packaging != 'pom': Archive.extract(artifact_path, target_dir) if artifact_coords.packaging == 'aar': # 'aar' archive contains classes.jar, extract it too into target_dir classes_jar_path = os.path.join(target_dir, "classes.jar") if os.path.isfile(classes_jar_path): Archive.extract(classes_jar_path, target_dir) os.remove(classes_jar_path) git.add_and_commit_everything() return digest, artifact_path
def fetch_pypi_artifact(ecosystem, name, version, target_dir): """Fetch Pypi artifact.""" git = Git.create_git(target_dir) pypi_url = ecosystem.fetch_url # NOTE: we can't download Python packages via pip, because it runs setup.py # even with `pip download`. Therefore we could always get syntax errors # because of older/newer syntax. res = requests.get(urljoin(pypi_url, '{n}/json'.format(n=name))) if res.status_code != 200: raise NotABugTaskError( "Unable to fetch information about {n} from PyPI (status code={s})" .format(n=name, s=res.status_code)) if not version: version = res.json()['info']['version'] release_files = res.json().get('releases', {}).get(version, []) if not release_files: raise NotABugTaskError("No release files for version %s" % version) # sort releases by order in which we'd like to download: # 1) sdist # 2) wheels # 3) eggs # 4) anything else (creepy stuff) def release_key(rel): return { 'sdist': 0, 'bdist_wheel': 1, 'bdist_egg': 2 }.get(rel['packagetype'], 3) release_files = list(sorted(release_files, key=release_key)) file_url = release_files[0]['url'] local_filename = IndianaJones.download_file(file_url, target_dir) if local_filename is None: raise NotABugTaskError("Unable to download: %s" % file_url) artifact_path = os.path.join(target_dir, local_filename) digest = compute_digest(artifact_path) Archive.extract(artifact_path, target_dir) git.add_and_commit_everything() return digest, artifact_path
def _resolve_dependency(ecosystem, dep): ret = { 'ecosystem': ecosystem.name, 'declaration': dep, 'resolved_at': json_serial(datetime.datetime.utcnow()) } # first, if this is a Github dependency, return it right away (we don't resolve these yet) if ' ' in dep: # we have both package name and version (version can be an URL) name, spec = dep.split(' ', 1) if gh_dep.match(spec): ret['name'] = name ret['version'] = 'https://github.com/' + spec elif urllib.parse.urlparse(spec).scheme != '': ret['name'] = name ret['version'] = spec else: if gh_dep.match(dep): ret['name'] = 'https://github.com/' + dep ret['version'] = None elif urllib.parse.urlparse(dep).scheme != '': ret['name'] = dep ret['version'] = None if 'name' in ret: return ret # second, figure out what is the latest upstream version matching the spec and return it solver = get_ecosystem_solver(ecosystem) try: pkgspec = solver.solve([dep]) except ValueError: raise NotABugTaskError("invalid dependency: {}".format(dep)) package, version = pkgspec.popitem() if not version: raise NotABugTaskError("could not resolve {}".format(dep)) ret['name'] = package ret['version'] = version return ret
def fetch_nuget_artifact(ecosystem, name, version, target_dir): """Fetch nuget artifact from nuget.org.""" git = Git.create_git(target_dir) nuget_url = ecosystem.fetch_url file_url = '{url}{name}.{version}.nupkg'.format( url=nuget_url, name=name.lower(), version=version.lower()) local_filename = IndianaJones.download_file(file_url, target_dir) if local_filename is None: raise NotABugTaskError("Unable to download: %s" % file_url) artifact_path = os.path.join(target_dir, local_filename) digest = compute_digest(artifact_path) Archive.extract(artifact_path, target_dir) git.add_and_commit_everything() return digest, artifact_path
def _get_log(url): """Clone Git repo and get its log. :param url: url to the git repo """ with TemporaryDirectory() as tmp_dir: try: git = Git.clone(url, tmp_dir) # nice notebook to check at: # http://nbviewer.jupyter.org/github/tarmstrong/code-analysis/blob/master/IPythonReviewTime.ipynb log = git.log() except TaskError as e: raise NotABugTaskError(e) return log
def get_gh_contributors(url): """Get number of contributors from Git URL. :param url: URL where to do the request :return: length of contributor's list """ try: response = requests.get("{}?per_page=1".format(url), headers=get_header()) response.raise_for_status() contributors_count = int(parse_qs(response.links['last']['url'])['page'][0]) \ if response.links else 1 return contributors_count except HTTPError as err: raise NotABugTaskError(err) from err
def get_response(url): """Wrap requests which tries to get response. :param url: URL where to do the request :param sleep_time: sleep time between retries :param retry_count: number of retries :return: content of response's json """ try: response = requests.get(url, headers=get_header()) response.raise_for_status() response = response.json() return response except HTTPError as err: message = "Failed to get results from {url} with {err}".format(url=url, err=err) logger.error(message) raise NotABugTaskError(message) from err
def fetch_go_artifact(name, version, target_dir): """Fetch go artifact using 'go get' command.""" env = dict(os.environ) env['GOPATH'] = target_dir Git.config() try: TimedCommand.get_command_output(['go', 'get', '-d', name], timeout=300, env=env, graceful=False) except TaskError: raise NotABugTaskError('Unable to go-get {n}'.format(n=name)) package_dir = os.path.join(target_dir, 'src', name) with cwd(package_dir): git = Git(package_dir) git.reset(version, hard=True) artifact_filename = git.archive(version) artifact_path = os.path.join(package_dir, artifact_filename) digest = compute_digest(artifact_path) return digest, artifact_path
def fetch_npm_artifact(ecosystem, name, version, target_dir): """Fetch npm artifact using system 'npm' tool.""" git = Git.create_git(target_dir) npm_cmd = ['npm', '--registry', ecosystem.fetch_url] # $ npm config get cache # /root/.npm cache_path = TimedCommand.get_command_output( npm_cmd + ['config', 'get', 'cache'], graceful=False).pop() # add package to cache: # /root/.npm/express/ # └── 4.13.4 # ├── package # │ ├── History.md # │ ├── index.js # │ ├── lib # │ ├── LICENSE # │ ├── package.json # │ └── Readme.md # └── package.tgz # 3 directories, 6 files name_ver = name try: # importing here to avoid circular dependency from f8a_worker.solver import NpmReleasesFetcher version_list = NpmReleasesFetcher(ecosystem).fetch_releases( name_ver)[1] if version not in version_list: raise NotABugTaskError( "Provided version is not supported '%s'" % name) else: name_ver = "{}@{}".format(name, version) except ValueError as e: raise NotABugTaskError( 'No versions for package NPM package {p} ({e})'.format( p=name, e=str(e))) # make sure the artifact is not in the cache yet TimedCommand.get_command_output(npm_cmd + ['cache', 'clean', name], graceful=False) logger.info("downloading npm module %s", name_ver) cmd = npm_cmd + ['cache', 'add', name_ver] TimedCommand.get_command_output(cmd, graceful=False) # copy tarball to workpath tarball_name = "package.tgz" glob_path = os.path.join(cache_path, name, "*") cache_abs_path = os.path.abspath(glob.glob(glob_path).pop()) artifact_path = os.path.join(cache_abs_path, tarball_name) logger.debug("[cache] tarball path = %s", artifact_path) artifact_path = shutil.copy(artifact_path, target_dir) logger.debug("[workdir] tarball path = %s", artifact_path) # Prior to npm-2.x.x (Fedora 24) # npm client was repackaging modules on download. It modified file permissions inside # package.tgz so they matched UID/GID of a user running npm command. Therefore its # digest was different then of a tarball downloaded directly from registry.npmjs.org. digest = compute_digest(artifact_path) Archive.extract(artifact_path, target_dir) Archive.fix_permissions(os.path.join(cache_abs_path, 'package')) # copy package/package.json over the extracted one, # because it contains (since npm >= 2.x.x) more information. npm_package_json = os.path.join(cache_abs_path, 'package', 'package.json') shutil.copy(npm_package_json, target_dir) # copy package/npm-shrinkwrap.json to target_dir npm_shrinkwrap_json = os.path.join(target_dir, 'package', 'npm-shrinkwrap.json') if os.path.isfile(npm_shrinkwrap_json): shutil.copy(npm_shrinkwrap_json, target_dir) git.add_and_commit_everything() return digest, artifact_path