def download_data(github_user, github_repo, cache_dir): """ Download the github url tarball to the cache directory """ import tarfile # make the cache dir if it doesnt exist if not os.path.exists(cache_dir): os.makedirs(cache_dir) # download the tarball locally tarball_link = "https://codeload.github.com/%s/%s/legacy.tar.gz/master" %(github_user, github_repo) tarball_local = os.path.join(cache_dir, 'master.tar.gz') request.urlretrieve(tarball_link, tarball_local) if not tarfile.is_tarfile(tarball_local): dir_exists = os.path.exists(os.path.dirname(tarball_local)) args = (tarball_local, str(dir_exists)) raise ValueError("downloaded tarball '%s' cannot be opened as a tar.gz file (directory exists: %s)" %args) # extract the tarball to the cache dir with tarfile.open(tarball_local, 'r:*') as tar: members = tar.getmembers() topdir = members[0].name for m in members[1:]: name = os.path.relpath(m.name, topdir) m.name = name tar.extract(m, path=cache_dir) # remove the downloaded tarball file if os.path.exists(tarball_local): os.remove(tarball_local)
def download_results_file(filename, localdir, github_url='https://github.com/bccp/nbodykit-data'): """ Download a specific results file from the github repo """ local_path = os.path.join(localdir, filename) if not os.path.exists(localdir): os.makedirs(localdir) # download the file if not os.path.exists(local_path): remote_path = os.path.join(github_url, 'raw', 'master', 'results', filename) request.urlretrieve(remote_path, local_path)