def _fingerprint_files_in_tar(file_list, tar_location): hasher = sha1() with open_tar(tar_location, mode='r', errorlevel=1) as tar: for file in file_list.dependencies: hasher.update(file.path) hasher.update(tar.extractfile(file.path).read()) return hasher.hexdigest()
def capture(self, run_info_dict): # Force the scm discovery logging messages to appear before ours, so the startup delay # is properly associated in the user's mind with us and not with scm. logger.info(f'Capturing repro information to {self._path}') with open_tar(self._path, 'w:gz', dereference=True, compresslevel=6) as tarout: for relpath in os.listdir(self._buildroot): if relpath not in self._ignore: tarout.add(os.path.join(self._buildroot, relpath), relpath) with temporary_file(binary_mode=False) as tmpfile: tmpfile.write( '# Pants repro captured for the following build:\n') for k, v in sorted(run_info_dict.items()): tmpfile.write(f'# {k}: {v}\n') cmd_line = list(sys.argv) # Use 'pants' instead of whatever the full executable path was on the user's system. cmd_line[0] = 'pants' # Remove any repro-related flags. The repro-ing user won't want to call those. cmd_line = [ x for x in cmd_line if not x.startswith('--repro-') ] tmpfile.write("'" + "' '".join(cmd_line) + "'\n") tmpfile.flush() chmod_plus_x(tmpfile.name) tarout.add(tmpfile.name, 'repro.sh')
def extract(self): try: with open_tar(self._tarfile, 'r', errorlevel=2) as tarin: # Note: We create all needed paths proactively, even though extractall() can do this for us. # This is because we may be called concurrently on multiple artifacts that share directories, # and there will be a race condition inside extractall(): task T1 A) sees that a directory # doesn't exist and B) tries to create it. But in the gap between A) and B) task T2 creates # the same directory, so T1 throws "File exists" in B). # This actually happened, and was very hard to debug. # Creating the paths here up front allows us to squelch that "File exists" error. paths = [] dirs = set() for tarinfo in tarin.getmembers(): paths.append(tarinfo.name) if tarinfo.isdir(): dirs.add(tarinfo.name) else: dirs.add(os.path.dirname(tarinfo.name)) for d in dirs: try: os.makedirs(os.path.join(self._artifact_root, d)) except OSError as e: if e.errno != errno.EEXIST: raise tarin.extractall(self._artifact_root) self._relpaths.update(paths) except tarfile.ReadError as e: raise ArtifactError(e.message)
def test_cache_no_dereference_no_file(self): self._prepare_task(deference=False, regular_file=False, regular_file_in_results_dir=False) all_vts = self.task.execute() self.assertGreater(len(all_vts), 0) for vt in all_vts: artifact_address = self._get_artifact_path(vt) with temporary_dir() as tmpdir: with open_tar(artifact_address, 'r') as tarout: tarout.extractall(path=tmpdir) file_path = self._find_first_file_in_path(tmpdir, SYMLINK_NAME) self.assertIsNotNone( file_path, "Cannot find file {} in artifact {}".format( SYMLINK_NAME, artifact_address)) self.assertTrue( os.path.islink(file_path), "{} in artifact {} should be a symlink but it is not.". format(SYMLINK_NAME, artifact_address)) # The destination of the symlink should be non-existent, hence IOError. with self.assertRaises(IOError): with open(file_path, 'r') as f: f.read()
def _fingerprint_files_in_tar(file_list, tar_location): hasher = sha1() with open_tar(tar_location, mode='r') as tar: for file in file_list.dependencies: hasher.update(file.path) hasher.update(tar.extractfile(file.path).read()) return hasher.hexdigest()
def extract(self): try: with open_tar(self._tarfile, 'r', errorlevel=2) as tarin: # Note: We create all needed paths proactively, even though extractall() can do this for us. # This is because we may be called concurrently on multiple artifacts that share directories, # and there will be a race condition inside extractall(): task T1 A) sees that a directory # doesn't exist and B) tries to create it. But in the gap between A) and B) task T2 creates # the same directory, so T1 throws "File exists" in B). # This actually happened, and was very hard to debug. # Creating the paths here up front allows us to squelch that "File exists" error. paths = [] dirs = set() for tarinfo in tarin.getmembers(): paths.append(tarinfo.name) if tarinfo.isdir(): dirs.add(tarinfo.name) else: dirs.add(os.path.dirname(tarinfo.name)) for d in dirs: try: os.makedirs(os.path.join(self._artifact_root, d)) except OSError as e: if e.errno != errno.EEXIST: raise tarin.extractall(self._artifact_root) self._relpaths.update(paths) except tarfile.ReadError as e: raise ArtifactError(e.message)
def create(self, basedir, outdir, name, prefix=None): tarpath = os.path.join( outdir, '%s.%s' % (name.decode('utf-8'), self.extension)) with open_tar(tarpath, self.mode, dereference=True, errorlevel=1) as tar: basedir = basedir.decode('utf-8') tar.add(basedir, arcname=prefix or '.') return tarpath
def create(self, basedir, outdir, name, prefix=None): basedir = ensure_text(basedir) tarpath = os.path.join(outdir, '%s.%s' % (ensure_text(name), self.extension)) with open_tar(tarpath, self.mode, dereference=True, errorlevel=1) as tar: tar.add(basedir, arcname=prefix or '.') return tarpath
def create(self, basedir, outdir, name, prefix=None): """ :API: public """ basedir = ensure_text(basedir) tarpath = os.path.join(outdir, '{}.{}'.format(ensure_text(name), self.extension)) with open_tar(tarpath, self.mode, dereference=True, errorlevel=1) as tar: tar.add(basedir, arcname=prefix or '.') return tarpath
def collect(self, paths): # In our tests, gzip is slightly less compressive than bzip2 on .class files, # but decompression times are much faster. mode = 'w:gz' if self._compress else 'w' with open_tar(self._tarfile, mode, dereference=True, errorlevel=2) as tarout: for path in paths or (): # Adds dirs recursively. relpath = os.path.relpath(path, self._artifact_root) tarout.add(path, relpath) self._relpaths.add(relpath)
def create(self, basedir, outdir, name, prefix=None, dereference=True): """ :API: public """ basedir = ensure_text(basedir) tarpath = os.path.join(outdir, '{}.{}'.format(ensure_text(name), self.extension)) with open_tar(tarpath, self.mode, dereference=dereference, errorlevel=1) as tar: tar.add(basedir, arcname=prefix or '.') return tarpath
def collect(self, paths): # In our tests, gzip is slightly less compressive than bzip2 on .class files, # but decompression times are much faster. mode = 'w:gz' if self._compress else 'w' with open_tar(self._tarfile, mode, dereference=True, errorlevel=2) as tarout: for path in paths or (): # Adds dirs recursively. relpath = os.path.relpath(path, self._artifact_root) tarout.add(path, relpath) self._relpaths.add(relpath)
def collect(self, paths): # In our tests, gzip is slightly less compressive than bzip2 on .class files, # but decompression times are much faster. mode = 'w:gz' tar_kwargs = {'dereference': self._dereference, 'errorlevel': 2, 'compresslevel': self._compression} with open_tar(self._tarfile, mode, **tar_kwargs) as tarout: for path in paths or (): # Adds dirs recursively. relpath = os.path.relpath(path, self._artifact_root) tarout.add(path, relpath) self._relpaths.add(relpath)
def collect(self, paths): # In our tests, gzip is slightly less compressive than bzip2 on .class files, # but decompression times are much faster. mode = 'w:gz' tar_kwargs = {'dereference': True, 'errorlevel': 2} tar_kwargs['compresslevel'] = self._compression with open_tar(self._tarfile, mode, **tar_kwargs) as tarout: for path in paths or (): # Adds dirs recursively. relpath = os.path.relpath(path, self._artifact_root) tarout.add(path, relpath) self._relpaths.add(relpath)
def _create_snapshot_archive(file_list, step_context): logger.debug('snapshotting files: {}'.format(file_list)) # Constructs the snapshot tar in a temporary location, then fingerprints it and moves it to the final path. with temporary_file_path(cleanup=False) as tmp_path: with open_tar(tmp_path, mode='w') as tar: for file in file_list.dependencies: # TODO handle GitProjectTree. Using add this this will fail with a non-filesystem project tree. tar.add(os.path.join(step_context.project_tree.build_root, file.path), file.path) snapshot = Snapshot(_fingerprint_files_in_tar(file_list, tmp_path)) tar_location = _snapshot_path(snapshot, step_context.snapshot_archive_root) shutil.move(tmp_path, tar_location) return snapshot
def _create_snapshot_archive(file_list, step_context): logger.debug('snapshotting files: {}'.format(file_list)) # Constructs the snapshot tar in a temporary location, then fingerprints it and moves it to the final path. with temporary_file_path(cleanup=False) as tmp_path: with open_tar(tmp_path, mode='w') as tar: for file in file_list.dependencies: # TODO handle GitProjectTree. Using add this this will fail with a non-filesystem project tree. tar.add(os.path.join(step_context.project_tree.build_root, file.path), file.path) snapshot = Snapshot(_fingerprint_files_in_tar(file_list, tmp_path)) tar_location = _snapshot_path(snapshot, step_context.snapshot_archive_root) shutil.move(tmp_path, tar_location) return snapshot
def _assert_dereferenced_symlink_in_cache(self, all_vts): """ Assert symlink is dereferenced when in the cache tarball. """ for vt in all_vts: artifact_address = self._get_artifact_path(vt) with temporary_dir() as tmpdir: with open_tar(artifact_address, 'r') as tarout: tarout.extractall(path=tmpdir) file_path = self._find_first_file_in_path(tmpdir, SYMLINK_NAME) self.assertIsNotNone(file_path, "Cannot find file {} in artifact {}".format(SYMLINK_NAME, artifact_address)) self.assertFalse( os.path.islink(file_path) , "{} in artifact {} should not be a symlink but it is.".format(SYMLINK_NAME, artifact_address) ) with open(file_path, 'r') as f: self.assertEqual(DUMMY_FILE_CONTENT, f.read())
def _assert_dereferenced_symlink_in_cache(self, all_vts): """ Assert symlink is dereferenced when in the cache tarball. """ for vt in all_vts: artifact_address = self._get_artifact_path(vt) with temporary_dir() as tmpdir: with open_tar(artifact_address, 'r') as tarout: tarout.extractall(path=tmpdir) file_path = self._find_first_file_in_path(tmpdir, SYMLINK_NAME) self.assertIsNotNone(file_path, "Cannot find file {} in artifact {}".format(SYMLINK_NAME, artifact_address)) self.assertFalse( os.path.islink(file_path) , "{} in artifact {} should not be a symlink but it is.".format(SYMLINK_NAME, artifact_address) ) with open(file_path, 'rb') as f: self.assertEqual(DUMMY_FILE_CONTENT, f.read())
def parse_tar(self, context, target, expected_image): docker_image_products = context.products.get('docker_image') self.assertIsNotNone(docker_image_products) product_data = docker_image_products.get(target) self.assertEqual(1, len(product_data)) result_dir, result_keys = product_data.items()[0] self.assertEqual(['docker_image_name'], result_keys) image_name_file = os.path.join(result_dir, result_keys[0]) with open(image_name_file, 'r') as f: result_image_name = f.read() self.assertEqual(expected_image, result_image_name) with temporary_dir() as result_td: result_tar = os.path.join(result_td, 'contents.tar') subprocess.check_call(['docker', 'save', '--output=' + result_tar, result_image_name]) with open_tar(result_tar) as tar: yield tar
def test_cache_no_dereference_file_inside_results_dir(self): self._prepare_task(deference=False, regular_file=True, regular_file_in_results_dir=True) all_vts = self.task.execute() self.assertGreater(len(all_vts), 0) for vt in all_vts: artifact_address = self._get_artifact_path(vt) with temporary_dir() as tmpdir: with open_tar(artifact_address, 'r') as tarout: tarout.extractall(path=tmpdir) file_path = self._find_first_file_in_path(tmpdir, SYMLINK_NAME) self.assertIsNotNone(file_path, "Cannot find file {} in artifact {}".format(SYMLINK_NAME, artifact_address)) self.assertTrue( os.path.islink(file_path), "{} in artifact {} should be a symlink but it is not.".format(SYMLINK_NAME, artifact_address) ) with open(file_path, 'rb') as f: self.assertEqual(DUMMY_FILE_CONTENT, f.read())
def test_cache_no_dereference_file_inside_results_dir(self): self._prepare_task(deference=False, regular_file=True, regular_file_in_results_dir=True) all_vts = self.task.execute() self.assertGreater(len(all_vts), 0) for vt in all_vts: artifact_address = self._get_artifact_path(vt) with temporary_dir() as tmpdir: with open_tar(artifact_address, 'r') as tarout: tarout.extractall(path=tmpdir) file_path = self._find_first_file_in_path(tmpdir, SYMLINK_NAME) self.assertIsNotNone(file_path, "Cannot find file {} in artifact {}".format(SYMLINK_NAME, artifact_address)) self.assertTrue( os.path.islink(file_path), "{} in artifact {} should be a symlink but it is not.".format(SYMLINK_NAME, artifact_address) ) with open(file_path, 'r') as f: self.assertEqual(DUMMY_FILE_CONTENT, f.read())
def test_repro(self): with temporary_dir() as tmpdir: fake_buildroot = os.path.join(tmpdir, 'buildroot') def add_file(path, content): fullpath = os.path.join(fake_buildroot, path) safe_mkdir_for(fullpath) with open(fullpath, 'w') as outfile: outfile.write(content) add_file('.git/foo', 'foo') add_file('dist/bar', 'bar') add_file('baz.txt', 'baz') add_file('qux/quux.txt', 'quux') repro_file = os.path.join(tmpdir, 'repro.tar.gz') repro = Repro(repro_file, fake_buildroot, ['.git', 'dist']) repro.capture(run_info_dict={'foo': 'bar', 'baz': 'qux'}) extract_dir = os.path.join(tmpdir, 'extract') with open_tar(repro_file, 'r:gz') as tar: tar.extractall(extract_dir) def assert_not_exists(relpath): fullpath = os.path.join(extract_dir, relpath) self.assertFalse(os.path.exists(fullpath)) def assert_file(relpath, expected_content=None): fullpath = os.path.join(extract_dir, relpath) self.assertTrue(os.path.isfile(fullpath)) if expected_content: with open(fullpath, 'r') as infile: content = infile.read() self.assertEquals(expected_content, content) assert_file('baz.txt', 'baz') assert_file('qux/quux.txt', 'quux') assert_file('repro.sh') assert_not_exists('.git') assert_not_exists('dist')
def test_cache_no_dereference_no_file(self): self._prepare_task(deference=False, regular_file=False, regular_file_in_results_dir=False) all_vts = self.task.execute() self.assertGreater(len(all_vts), 0) for vt in all_vts: artifact_address = self._get_artifact_path(vt) with temporary_dir() as tmpdir: with open_tar(artifact_address, 'r') as tarout: tarout.extractall(path=tmpdir) file_path = self._find_first_file_in_path(tmpdir, SYMLINK_NAME) self.assertIsNotNone(file_path, "Cannot find file {} in artifact {}".format(SYMLINK_NAME, artifact_address)) self.assertTrue( os.path.islink(file_path), "{} in artifact {} should be a symlink but it is not.".format(SYMLINK_NAME, artifact_address) ) # The destination of the symlink should be non-existent, hence IOError. with self.assertRaises(IOError): with open(file_path, 'rb') as f: f.read()
def test_repro(self): with temporary_dir() as tmpdir: fake_buildroot = os.path.join(tmpdir, 'buildroot') def add_file(path, content): fullpath = os.path.join(fake_buildroot, path) safe_mkdir_for(fullpath) with open(fullpath, 'w') as outfile: outfile.write(content) add_file('.git/foo', 'foo') add_file('dist/bar', 'bar') add_file('baz.txt', 'baz') add_file('qux/quux.txt', 'quux') repro_file = os.path.join(tmpdir, 'repro.tar.gz') repro = Repro(repro_file, fake_buildroot, ['.git', 'dist']) repro.capture(run_info_dict={'foo': 'bar', 'baz': 'qux'}) extract_dir = os.path.join(tmpdir, 'extract') with open_tar(repro_file, 'r:gz') as tar: tar.extractall(extract_dir) def assert_not_exists(relpath): fullpath = os.path.join(extract_dir, relpath) self.assertFalse(os.path.exists(fullpath)) def assert_file(relpath, expected_content=None): fullpath = os.path.join(extract_dir, relpath) self.assertTrue(os.path.isfile(fullpath)) if expected_content: with open(fullpath, 'r') as infile: content = infile.read() self.assertEquals(expected_content, content) assert_file('baz.txt', 'baz') assert_file('qux/quux.txt', 'quux') assert_file('repro.sh') assert_not_exists('.git') assert_not_exists('dist')
def capture(self, run_info_dict): # Force the scm discovery logging messages to appear before ours, so the startup delay # is properly associated in the user's mind with us and not with scm. logger.info('Capturing repro information to {}'.format(self._path)) with open_tar(self._path, 'w:gz', dereference=True, compresslevel=6) as tarout: for relpath in os.listdir(self._buildroot): if relpath not in self._ignore: tarout.add(os.path.join(self._buildroot, relpath), relpath) with temporary_file() as tmpfile: tmpfile.write('# Pants repro captured for the following build:\n') for k, v in sorted(run_info_dict.items()): tmpfile.write('# {}: {}\n'.format(k, v)) cmd_line = list(sys.argv) # Use 'pants' instead of whatever the full executable path was on the user's system. cmd_line[0] = 'pants' # Remove any repro-related flags. The repro-ing user won't want to call those. cmd_line = [x for x in cmd_line if not x.startswith('--repro-')] tmpfile.write("'" +"' '".join(cmd_line) + "'\n") tmpfile.flush() chmod_plus_x(tmpfile.name) tarout.add(tmpfile.name, 'repro.sh')
def _extract(cls, path, outdir, **kwargs): with open_tar(path, errorlevel=1) as tar: tar.extractall(outdir)
def _extract(self, path_or_file, outdir, **kwargs): with open_tar(path_or_file, errorlevel=1, **kwargs) as tar: tar.extractall(outdir)
def create(self, basedir, outdir, name, prefix=None): tarpath = os.path.join(outdir, '%s.%s' % (name.decode('utf-8'), self.extension)) with open_tar(tarpath, self.mode, dereference=True, errorlevel=1) as tar: basedir = basedir.decode('utf-8') tar.add(basedir, arcname=prefix or '.') return tarpath
def _extract_snapshot(step_context, snapshot, sandbox_dir, subject): with open_tar(_snapshot_path(snapshot, step_context.snapshot_archive_root), errorlevel=1) as tar: tar.extractall(sandbox_dir) logger.debug('extracted {} snapshot to {}'.format(subject, sandbox_dir))
def extract(cls, path, outdir): """ :API: public """ with open_tar(path, errorlevel=1) as tar: tar.extractall(outdir)
def _extract(self, path_or_file, outdir, **kwargs): with open_tar(path_or_file, errorlevel=1, **kwargs) as tar: if PY2: outdir = outdir.encode('utf-8') tar.extractall(outdir)
def _extract(self, path_or_file, outdir, **kwargs): with open_tar(path_or_file, errorlevel=1, **kwargs) as tar: if PY2: outdir = outdir.encode('utf-8') tar.extractall(outdir)
def assert_archive_files(self, expected_archive_files, snapshot, snapshot_archive_root): with open_tar(_snapshot_path(snapshot, snapshot_archive_root), errorlevel=1) as tar: self.assertEqual(sorted(expected_archive_files), sorted(tar.getnames()))
def _extract_snapshot(snapshot_archive_root, snapshot, sandbox_dir): with open_tar(_snapshot_path(snapshot, snapshot_archive_root), errorlevel=1) as tar: tar.extractall(sandbox_dir)
def _extract(self, path_or_file, outdir, **kwargs): with open_tar(path_or_file, errorlevel=1, **kwargs) as tar: tar.extractall(outdir)
def assert_archive_files(self, expected_archive_files, snapshot, snapshot_archive_root): with open_tar(_snapshot_path(snapshot, snapshot_archive_root), errorlevel=1) as tar: self.assertEqual(sorted(expected_archive_files), sorted(tar.getnames()))
def _extract_snapshot(snapshot_archive_root, snapshot, sandbox_dir, subject): with open_tar(_snapshot_path(snapshot, snapshot_archive_root), errorlevel=1) as tar: tar.extractall(sandbox_dir) logger.debug('extracted {} snapshot to {}'.format(subject, sandbox_dir))
def extract(cls, path, outdir): """ :API: public """ with open_tar(path, errorlevel=1) as tar: tar.extractall(outdir)
def _extract_snapshot(snapshot_archive_root, snapshot, sandbox_dir): with open_tar(_snapshot_path(snapshot, snapshot_archive_root), errorlevel=1) as tar: tar.extractall(sandbox_dir)
def extract(cls, path, outdir): with open_tar(path, errorlevel=1) as tar: tar.extractall(outdir)
def assert_archive_files(self, expected_archive_files, snapshot, step_context): with open_tar(_snapshot_path(snapshot, step_context.snapshot_archive_root), errorlevel=1) as tar: self.assertEqual(expected_archive_files, [tar_info.path for tar_info in tar.getmembers()])