def make_tarball(project, _hash): ''' Create tar.bz2 (or .gz) ball for the experiments directory at a given commit. ''' tarball_ext = '.tar.' + conf.tarball_compression_method tarfile_method = 'w:' + conf.tarball_compression_method tarfile_name = _hash + tarball_ext tarballs_directory = conf.experiment_archives_cache tarfile_path = os.path.join(tarballs_directory, tarfile_name) with tarfile.open(tarfile_path, tarfile_method) as tarball: export_tmpdir = git_export(project, _hash) file_hash_list = [] for full_path, relative_path, checksum\ in sys.list_directory_checksums(export_tmpdir): file_hash_list.append('%s %s' % (relative_path, checksum)) tarball.add(full_path, arcname=relative_path) tmpfile = tempfile.NamedTemporaryFile(delete=False) tmpfile.write('\n'.join(file_hash_list)+'\n') tmpfile.close() os.chmod(tmpfile.name, 0644) tarball.add(tmpfile.name, conf.tarball_checksum) os.unlink(tmpfile.name) experiment_details = get_experiment_details(export_tmpdir) shutil.rmtree(export_tmpdir) # Zap export dir. return experiment_details, tarfile_path
def commit(self, date, msg='A commit'): ''' Issue a git commit. Record the tree structure snapshot, return the commit details. ''' git_args = ['commit', '-m', msg] git_args += get_date_arg(date) self.git(git_args) arguments = ['log', '-n', '1'] arguments.append(archives_conf.git_log_format) _hash, tstamp\ = self.git(*arguments).stdout.strip().split(',') self.commit_dictionary[_hash]\ = sys.list_directory_checksums(self.path)
def make_tarball(data_dir, boilerplates, label, compression_method="bz2", checksum_filename="checksum.txt"): """ Create bz2 (or gz) tarball of files in dirname directory. Run a checksum on file contents and append this information to the tarball. """ tarball_ext = ".tar." + compression_method tarfile_method = "w:" + compression_method tarball_filename = label + tarball_ext tarballs_directory = conf.data_archives_cache tarfile_path = os.path.join(tarballs_directory, tarball_filename) with tarfile.open(tarfile_path, tarfile_method) as tarball: checksums = [] for (full_path, relative_path, checksum) in sys.list_directory_checksums( data_dir, algorithm=conf.default_hash_algorithm[0] ): checksums.append((checksum, relative_path)) tarball.add(full_path, arcname=relative_path) checksum_txt = "\n".join(["%s %s" % checksum for checksum in checksums]) + "\n" boilerplates.append((checksum_filename, checksum_txt)) for (boilerplate_filename, boilerplate_file_content) in boilerplates: tmpfile = tempfile.NamedTemporaryFile(delete=False) tmpfile.write(boilerplate_file_content) tmpfile.close() os.chmod(tmpfile.name, 0644) tarball.add(tmpfile.name, boilerplate_filename) os.unlink(tmpfile.name) tarball_filesize = human_readable(os.path.getsize(tarfile_path)) return tarball_filename, checksums, tarball_filesize