def test_DownloadArchive(self): # Check that we can download a package archive correctly. with pynacl.working_directory.TemporaryWorkingDirectory() as work_dir: mock_tar = self.GenerateMockFile(work_dir) fake_url = 'http://www.fake.com/archive.tar' self._fake_downloader.StoreURL(fake_url, mock_tar) package_desc = self.GeneratePackageInfo( [mock_tar], url_dict={mock_tar: fake_url}) tar_dir = os.path.join(work_dir, 'tar_dir') package_target = 'archive_target' package_name = 'archive_name' package_version.DownloadPackageArchives( tar_dir, package_target, package_name, package_desc, downloader=self._fake_downloader.Download) self.assertEqual( self._fake_downloader.GetDownloadCount(), 1, "Expected a single archive to have been downloaded.") mock_name = os.path.basename(mock_tar) local_archive_file = package_locations.GetLocalPackageArchiveFile( tar_dir, package_target, package_name, mock_name) self.assertEqual(package_info.GetArchiveHash(local_archive_file), package_info.GetArchiveHash(mock_tar))
def test_ArchiveHashStable(self): # Check if archive hash produces a stable hash with pynacl.working_directory.TemporaryWorkingDirectory() as work_dir: temp1 = os.path.join(work_dir, 'temp1.txt') temp2 = os.path.join(work_dir, 'temp2.txt') temp_contents = 'this is a test' with open(temp1, 'wt') as f: f.write(temp_contents) with open(temp2, 'wt') as f: f.write(temp_contents) self.assertEqual(package_info.GetArchiveHash(temp1), package_info.GetArchiveHash(temp2))
def GeneratePackageInfo(self, archive_list, name_dict={}, url_dict={}, src_dir_dict={}, dir_dict={}): """Generates a package_info.PackageInfo object for list of archives." Args: archive_list: List of file paths where package archives sit. name_dict: optional dict of archive to names, otherwise use filename. url_dict: dict of archive file path to URL if url exists. src_dir_dict: dict of archive file path to source tar dir if exists. dir_dict: dict of archive file path to root dir if exists. """ package_desc = package_info.PackageInfo() for archive_file in archive_list: archive_name = name_dict.get(archive_file, os.path.basename(archive_file)) if os.path.isfile(archive_file): archive_hash = package_info.GetArchiveHash(archive_file) else: archive_hash = 'invalid' archive_url = url_dict.get(archive_file, None) archive_src_tar_dir = src_dir_dict.get(archive_file, '') archive_dir = dir_dict.get(archive_file, '') package_desc.AppendArchive(archive_name, archive_hash, url=archive_url, tar_src_dir=archive_src_tar_dir, extract_dir=archive_dir) return package_desc
def test_HashEmptyForMissingFiles(self): # Many scripts rely on the archive hash returning None for missing files. with pynacl.working_directory.TemporaryWorkingDirectory() as work_dir: self.assertEqual(None, package_info.GetArchiveHash('missingfile.tgz'))
def ExtractPackageTargets(package_target_packages, tar_dir, dest_dir, downloader=None): """Extracts package targets from the tar directory to the destination. Each package archive within a package will be verified before being extracted. If a package archive does not exist or does not match the hash stored within the package file, it will be re-downloaded before being extracted. Args: package_target_packages: List of tuples of package target and package names. tar_dir: Source tar directory where package archives live. dest_dir: Root destination directory where packages will be extracted to. downloader: function which takes a url and a file path for downloading. """ if downloader is None: downloader = gsd_storage.HttpDownload for package_target, package_name in package_target_packages: package_file = package_locations.GetLocalPackageFile( tar_dir, package_target, package_name ) package_desc = package_info.PackageInfo(package_file) dest_package_dir = package_locations.GetFullDestDir( dest_dir, package_target, package_name ) dest_package_file = package_locations.GetDestPackageFile( dest_dir, package_target, package_name ) # Only do the extraction if the extract packages do not match. if os.path.isfile(dest_package_file): dest_package_desc = package_info.PackageInfo(dest_package_file) if dest_package_desc == package_desc: logging.debug('Skipping extraction for package (%s)', package_name) continue if os.path.isdir(dest_package_dir): logging.info('Deleting old package directory: %s', dest_package_dir) pynacl.file_tools.RemoveDir(dest_package_dir) logging.info('Extracting package (%s) to directory: %s', package_name, dest_package_dir) for archive_desc in package_desc.GetArchiveList(): archive_file = package_locations.GetLocalPackageArchiveFile( tar_dir, package_target, package_name, archive_desc.name ) # Upon extraction, some files may not be downloaded (or have stale files), # we need to check the hash of each file and attempt to download it if # they do not match. archive_hash = package_info.GetArchiveHash(archive_file) if archive_hash != archive_desc.hash: logging.warn('Expected archive missing, downloading: %s', archive_desc.name) if archive_desc.url is None: raise IOError('Invalid archive file and URL: %s' % archive_file) pynacl.file_tools.MakeParentDirectoryIfAbsent(archive_file) downloader(archive_desc.url, archive_file) archive_hash = package_info.GetArchiveHash(archive_file) if archive_hash != archive_desc.hash: raise IOError('Downloaded archive file does not match hash.' ' [%s] Expected %s, received %s.' % (archive_file, archive_desc.hash, archive_hash)) destination_dir = os.path.join(dest_package_dir, archive_desc.extract_dir) logging.info('Extracting %s to %s...', archive_desc.name, destination_dir) temp_dir = os.path.join(destination_dir, '.tmp') pynacl.file_tools.RemoveDir(temp_dir) os.makedirs(temp_dir) with tarfile.TarFile(archive_file, 'r') as f: f.extractall(temp_dir) temp_src_dir = os.path.join(temp_dir, archive_desc.tar_src_dir) pynacl.file_tools.MoveAndMergeDirTree(temp_src_dir, destination_dir) pynacl.file_tools.RemoveDir(temp_dir) pynacl.file_tools.MakeParentDirectoryIfAbsent(dest_package_file) shutil.copy(package_file, dest_package_file)
def UploadPackage(storage, revision, tar_dir, package_target, package_name, is_shared_package, annotate=False, custom_package_file=None): """Uploads a local package file to the supplied cloud storage object. By default local package files are expected to be found in the standardized location within the tar directory, however a custom package file may be specified to upload from a different location. Package archives that do not have their URL field set will automaticaly have the archives uploaded so that someone accessing the package file from the cloud storage will also have access to the package archives. Args: storage: Cloud storage object which supports PutFile and GetFile. revision: SVN Revision number the package should be associated with. tar_dir: Root tar directory where archives live. package_target: Package target of the package to archive. package_name: Package name of the package to archive. is_shared_package: Is this package shared among all package targets? custom_package_file: File location for a custom package file. Returns: Returns remote download key for the uploaded package file. """ if annotate: print '@@@BUILD_STEP upload_package@@@' if custom_package_file is not None: local_package_file = custom_package_file else: local_package_file = package_locations.GetLocalPackageFile( tar_dir, package_target, package_name ) # Upload the package file and also upload any local package archives so # that they are downloadable. package_desc = package_info.PackageInfo(local_package_file) upload_package_desc = package_info.PackageInfo() for archive_desc in package_desc.GetArchiveList(): url = archive_desc.url if url is None: archive_file = package_locations.GetLocalPackageArchiveFile( tar_dir, package_target, package_name, archive_desc.name) archive_hash = package_info.GetArchiveHash(archive_file) if archive_hash is None: raise IOError('Missing Archive File: %s' % archive_file) elif archive_hash != archive_desc.hash: raise IOError( 'Archive hash does not match package hash: %s' % archive_file + '\n Archive Hash: %s' % archive_hash + '\n Package Hash: %s' % archive_desc.hash ) logging.warn('Missing archive URL: %s', archive_desc.name) logging.warn('Uploading archive to be publically available...') remote_archive_key = package_locations.GetRemotePackageArchiveKey( archive_desc.name, archive_desc.hash ) url = storage.PutFile( archive_file, remote_archive_key, clobber=True ) if annotate: print '@@@STEP_LINK@download@%s@@@' % url upload_package_desc.AppendArchive( archive_desc.name, archive_desc.hash, url=url, tar_src_dir=archive_desc.tar_src_dir, extract_dir=archive_desc.extract_dir ) upload_package_file = local_package_file + '.upload' pynacl.file_tools.MakeParentDirectoryIfAbsent(upload_package_file) upload_package_desc.SavePackageFile(upload_package_file) logging.info('Uploading package information: %s', package_name) remote_package_key = package_locations.GetRemotePackageKey( is_shared_package, revision, package_target, package_name ) url = storage.PutFile(upload_package_file, remote_package_key) print '@@@STEP_LINK@download@%s@@@' % url return remote_package_key
def ArchivePackageArchives(tar_dir, package_target, package_name, archives): """Archives local package archives to the tar directory. Args: tar_dir: Root tar directory where archives live. package_target: Package target of the package to archive. package_name: Package name of the package to archive. archives: List of archive file paths where archives currently live. Returns: Returns the local package file that was archived. """ local_package_file = package_locations.GetLocalPackageFile( tar_dir, package_target, package_name ) archive_list = [] package_desc = package_info.PackageInfo() for archive in archives: archive_url = None if '@' in archive: archive, archive_url = archive.split('@', 1) extract_param = '' tar_src_dir = '' extract_dir = '' if ',' in archive: archive, extract_param = archive.split(',', 1) if ':' in extract_param: tar_src_dir, extract_dir = extract_param.split(':', 1) else: tar_src_dir = extract_param archive_hash = package_info.GetArchiveHash(archive) if archive_hash is None: raise IOError('Invalid package: %s.' % archive) archive_name = os.path.basename(archive) archive_list.append(archive) package_desc.AppendArchive( archive_name, archive_hash, url=archive_url, tar_src_dir=tar_src_dir, extract_dir=extract_dir ) # We do not need to archive the package if it already matches. But if the # local package file is invalid or does not match, then we should recreate # the json file. if os.path.isfile(local_package_file): try: current_package_desc = package_info.PackageInfo(local_package_file) if current_package_desc == package_desc: return except ValueError: pass # Copy each of the packages over to the tar directory first. for archive_file in archive_list: archive_name = os.path.basename(archive_file) local_archive_file = package_locations.GetLocalPackageArchiveFile( tar_dir, package_target, package_name, archive_name ) logging.info('Archiving file: %s', archive_file) pynacl.file_tools.MakeParentDirectoryIfAbsent(local_archive_file) shutil.copyfile(archive_file, local_archive_file) # Once all the copying is completed, update the local packages file. logging.info('Package "%s" archived: %s', package_name, local_package_file) pynacl.file_tools.MakeParentDirectoryIfAbsent(local_package_file) package_desc.SavePackageFile(local_package_file) return local_package_file
def DownloadPackageArchives(tar_dir, package_target, package_name, package_desc, downloader=None): """Downloads package archives from the cloud to the tar directory. Args: tar_dir: Root tar directory where archives will be downloaded to. package_target: Package target of the package to download. package_name: Package name of the package to download. package_desc: package_info object of the package to download. downloader: function which takes a url and a file path for downloading. Returns: The list of files that were downloaded. """ downloaded_files = [] if downloader is None: downloader = gsd_storage.HttpDownload local_package_file = package_locations.GetLocalPackageFile( tar_dir, package_target, package_name ) # To ensure that we do not redownload extra archives that we already have, # create a dictionary of old package archives that contains the hash of each # package archive. old_archives = {} if os.path.isfile(local_package_file): old_package_desc = package_info.PackageInfo(local_package_file) old_archives_list = old_package_desc.GetArchiveList() old_archive_names = [archive.name for archive in old_archives_list] for archive_name in old_archive_names: archive_file = package_locations.GetLocalPackageArchiveFile( tar_dir, package_target, package_name, archive_name ) archive_hash = package_info.GetArchiveHash(archive_file) if archive_hash is not None: old_archives[archive_name] = archive_hash # Download packages information file along with each of the package # archives described in the information file. Also keep track of what # new package names matches old package names. We will have to delete # stale package names after we are finished. for archive_info in package_desc.GetArchiveList(): old_hash = old_archives.get(archive_info.name, None) if old_hash is not None: old_archives.pop(archive_info.name) if archive_info.hash == old_hash: logging.debug('Skipping matching archive: %s', archive_info.name) continue local_archive_file = package_locations.GetLocalPackageArchiveFile( tar_dir, package_target, package_name, archive_info.name ) pynacl.file_tools.MakeParentDirectoryIfAbsent(local_archive_file) if archive_info.url is None: raise IOError('Error, no URL for archive: %s' % archive_info.name) logging.info('Downloading package archive: %s', archive_info.name) downloader(archive_info.url, local_archive_file) verified_hash = package_info.GetArchiveHash(local_archive_file) if verified_hash != archive_info.hash: raise IOError('Package hash check failed: %s != %s' % (verified_hash, archive_info.hash)) downloaded_files.append(local_archive_file) # Delete any stale left over packages. for old_archive in old_archives: archive_file = package_locations.GetLocalPackageArchiveFile( tar_dir, package_target, package_name, old_archive ) os.unlink(archive_file) return downloaded_files