def helperCheckParallel( self, test_name, hpss_path, zstash_path=ZSTASH_PATH, verbose=False ): """ Test `zstash check` in parallel. """ self.hpss_path = hpss_path use_hpss = self.setupDirs(test_name) self.create(use_hpss, zstash_path) self.add_files(use_hpss, zstash_path) self.extract(use_hpss, zstash_path) print_starred("Checking the files in parallel.") self.assertWorkspace() os.chdir(self.test_dir) v_option = " -v" if verbose else "" cmd = "{}zstash check{} --hpss={} --workers=3".format( zstash_path, v_option, self.hpss_path ) output, err = run_cmd(cmd) os.chdir(TOP_LEVEL) expected_present = [ "Checking file0.txt", "Checking file0_hard.txt", "Checking file0_soft.txt", "Checking file_empty.txt", "Checking dir/file1.txt", "Checking empty_dir", "Checking dir2/file2.txt", "Checking file3.txt", "Checking file4.txt", "Checking file5.txt", ] expected_absent = ["ERROR"] self.check_strings(cmd, output + err, expected_present, expected_absent)
def helperCreateExcludeFile(self, test_name, hpss_path, zstash_path=ZSTASH_PATH): """ Test `zstash --exclude`, excluding a file. """ self.hpss_path = hpss_path use_hpss = self.setupDirs(test_name) if use_hpss: description_str = "Adding files to HPSS" else: description_str = "Adding files to local archive" print_starred(description_str) self.assertWorkspace() self.writeExtraFiles() excluded_files = "not_exclude_dir/file_b.txt" cmd = "{}zstash create --exclude={} --hpss={} {}".format( zstash_path, excluded_files, self.hpss_path, self.test_dir) output, err = run_cmd(cmd) expected_present = [ "Archiving exclude_dir/file_a.txt", "Archiving not_exclude_dir/file_c.txt", ] if use_hpss: expected_present += ["Transferring file to HPSS"] else: expected_present += ["put: HPSS is unavailable"] expected_absent = ["ERROR", "Archiving not_exclude_dir/file_b.txt"] self.check_strings(cmd, output + err, expected_present, expected_absent)
def helperLs(self, test_name, hpss_path, cache=None, zstash_path=ZSTASH_PATH): """ Test `zstash ls`. """ self.hpss_path = hpss_path if cache: # Override default cache self.cache = cache cache_option = " --cache={}".format(self.cache) else: cache_option = "" use_hpss = self.setupDirs(test_name) self.create(use_hpss, zstash_path, cache=self.cache) self.assertWorkspace() os.chdir(self.test_dir) for option in ["", "-v", "-l"]: print_starred("Testing zstash ls {}".format(option)) cmd = "{}zstash ls{} {} --hpss={}".format(zstash_path, cache_option, option, self.hpss_path) output, err = run_cmd(cmd) self.check_strings(cmd, output + err, ["file0.txt"], ["ERROR"]) os.chdir(TOP_LEVEL)
def helperExtractKeep(self, test_name, hpss_path, zstash_path=ZSTASH_PATH): """ Test `zstash extract` with `--keep`. """ self.hpss_path = hpss_path use_hpss = self.setupDirs(test_name) self.create(use_hpss, zstash_path) self.add_files(use_hpss, zstash_path) self.extract(use_hpss, zstash_path) msg = "Deleting the extracted files and doing it again without verbose option, " msg += "while making sure the tars are kept." print_starred(msg) self.assertWorkspace() shutil.rmtree(self.test_dir) os.mkdir(self.test_dir) os.chdir(self.test_dir) if not use_hpss: shutil.copytree( "{}/{}/{}".format(TOP_LEVEL, self.backup_dir, self.cache), self.copy_dir) cmd = "{}zstash extract --hpss={} --keep".format( zstash_path, self.hpss_path) output, err = run_cmd(cmd) if not compare( os.listdir(self.cache), [ "index.db", "000000.tar", "000001.tar", "000002.tar", "000003.tar", "000004.tar", ], ): error_message = "The zstash directory does not contain expected files.\nIt has: {}".format( os.listdir(self.cache)) self.stop(error_message) os.chdir(TOP_LEVEL) expected_present = [ "Extracting file0.txt", "Extracting file0_hard.txt", "Extracting file0_soft.txt", "Extracting file_empty.txt", "Extracting dir/file1.txt", "Extracting empty_dir", "Extracting dir2/file2.txt", "Extracting file3.txt", "Extracting file4.txt", "Extracting file5.txt", ] if use_hpss: expected_present.append("Transferring file from HPSS") expected_absent = ["ERROR", "Not extracting"] self.check_strings(cmd, output + err, expected_present, expected_absent)
def helperCheckVerboseMismatch(self, test_name, hpss_path, zstash_path=ZSTASH_PATH): """ Test `zstash check` with MD5 mismatch errors. """ self.hpss_path = hpss_path use_hpss = self.setupDirs(test_name) self.create(use_hpss, zstash_path) self.add_files(use_hpss, zstash_path) self.extract(use_hpss, zstash_path) print_starred("Causing MD5 mismatch errors and checking the files.") self.assertWorkspace() os.chdir(self.test_dir) shutil.copy("{}/index.db".format(self.cache), "{}/index_old.db".format(self.cache)) print("Messing up the MD5 of all of the files with an even id.") sqlite_cmd = [ "sqlite3", "{}/index.db".format(self.cache), "UPDATE files SET md5 = 0 WHERE id % 2 = 0;", ] run_cmd(sqlite_cmd) zstash_cmd = "{}zstash check -v --hpss={}".format( zstash_path, self.hpss_path) output, err = run_cmd(zstash_cmd) # These files have an even `id` in the sqlite3 table. expected_present = [ "md5 mismatch for: dir/file1.txt", "md5 mismatch for: file3.txt", "ERROR: 000001.tar", "ERROR: 000004.tar", "ERROR: 000002.tar", ] # These files have an odd `id` in the sqlite3 table. expected_absent = [ "ERROR: 000000.tar", "ERROR: 000003.tar", "ERROR: 000005.tar", ] self.check_strings(zstash_cmd, output + err, expected_present, expected_absent) # Put the original index.db back. os.remove("{}/index.db".format(self.cache)) shutil.copy("{}/index_old.db".format(self.cache), "{}/index.db".format(self.cache)) os.chdir(TOP_LEVEL)
def helperUpdate(self, test_name, hpss_path, zstash_path=ZSTASH_PATH): """ Test `zstash update`. """ self.hpss_path = hpss_path use_hpss = self.setupDirs(test_name) self.create(use_hpss, zstash_path) print_starred( "Running update on the newly created directory, nothing should happen" ) self.assertWorkspace() os.chdir(self.test_dir) cmd = "{}zstash update -v --hpss={}".format(zstash_path, self.hpss_path) output, err = run_cmd(cmd) os.chdir(TOP_LEVEL) self.check_strings(cmd, output + err, ["Nothing to update"], ["ERROR"])
def helperCheck(self, test_name, hpss_path, cache=None, zstash_path=ZSTASH_PATH): """ Test `zstash check`. """ self.hpss_path = hpss_path if cache: # Override default cache self.cache = cache cache_option = " --cache={}".format(self.cache) else: cache_option = "" use_hpss = self.setupDirs(test_name) self.create(use_hpss, zstash_path, cache=self.cache) self.add_files(use_hpss, zstash_path, cache=self.cache) print_starred("Testing the checking functionality") self.assertWorkspace() os.chdir(self.test_dir) cmd = "{}zstash check{} --hpss={}".format(zstash_path, cache_option, self.hpss_path) output, err = run_cmd(cmd) expected_present = [ "Checking file0.txt", "Checking file0_hard.txt", "Checking file0_soft.txt", "Checking file_empty.txt", "Checking dir/file1.txt", "Checking empty_dir", "Checking dir2/file2.txt", "Checking file3.txt", "Checking file4.txt", "Checking file5.txt", ] expected_absent = ["ERROR"] self.check_strings(cmd, output + err, expected_present, expected_absent) cmd = "{}zstash check{} -v --hpss={}".format(zstash_path, cache_option, self.hpss_path) output, err = run_cmd(cmd) self.check_strings(cmd, output + err, expected_present, expected_absent) os.chdir(TOP_LEVEL)
def helperLsTarsUpdate(self, test_name, hpss_path, zstash_path=ZSTASH_PATH): """ Test `zstash ls --tars` when the database was initially created without the tars table """ self.hpss_path = hpss_path use_hpss = self.setupDirs(test_name) # Create without a tar table -- simulate a user updating an existing database self.create(use_hpss, zstash_path, no_tars_md5=True) self.assertWorkspace() os.chdir(self.test_dir) print_starred("Testing zstash ls") cmd = "{}zstash ls --hpss={}".format(zstash_path, self.hpss_path) output, err = run_cmd(cmd) # tars should not be listed self.check_strings(cmd, output + err, [], ["tars table does not exist", ".tar", "ERROR"]) print_starred("Testing zstash ls --tars") cmd = "{}zstash ls --tars --hpss={}".format(zstash_path, self.hpss_path) output, err = run_cmd(cmd) # tars should not be listed self.check_strings(cmd, output + err, ["tars table does not exist"], [".tar", "ERROR"]) os.chdir(TOP_LEVEL) # Updating should create the tars table self.add_files(use_hpss, zstash_path) os.chdir(self.test_dir) print_starred("Testing zstash ls --tars") cmd = "{}zstash ls --tars --hpss={}".format(zstash_path, self.hpss_path) output, err = run_cmd(cmd) # tar should be listed now self.check_strings(cmd, output + err, ["000001.tar"], ["ERROR"]) print_starred("Testing zstash ls --tars -l") cmd = "{}zstash ls --tars -l --hpss={}".format(zstash_path, self.hpss_path) output, err = run_cmd(cmd) self.check_strings(cmd, output + err, ["000001.tar"], ["ERROR"]) os.chdir(TOP_LEVEL)
def helperUpdateDryRun(self, test_name, hpss_path, zstash_path=ZSTASH_PATH): """ Test `zstash update --dry-run`. """ self.hpss_path = hpss_path use_hpss = self.setupDirs(test_name) self.create(use_hpss, zstash_path) print_starred("Testing update with an actual change") self.assertWorkspace() if not os.path.exists("{}/dir2".format(self.test_dir)): os.mkdir("{}/dir2".format(self.test_dir)) write_file("{}/dir2/file2.txt".format(self.test_dir), "file2 stuff") write_file("{}/dir/file1.txt".format(self.test_dir), "file1 stuff with changes") os.chdir(self.test_dir) cmd = "{}zstash update --dry-run --hpss={}".format( zstash_path, self.hpss_path) output, err = run_cmd(cmd) os.chdir(TOP_LEVEL) expected_present = [ "List of files to be updated", "dir/file1.txt", "dir2/file2.txt", ] # Make sure none of the old files or directories are moved. expected_absent = [ "ERROR", "file0", "file_empty", "empty_dir", "INFO: Creating new tar archive", ] self.check_strings(cmd, output + err, expected_present, expected_absent)
def helperLsTars(self, test_name, hpss_path, zstash_path=ZSTASH_PATH): """ Test `zstash ls --tars` """ self.hpss_path = hpss_path use_hpss = self.setupDirs(test_name) self.create(use_hpss, zstash_path) self.assertWorkspace() os.chdir(self.test_dir) print_starred("Testing zstash ls --tars") cmd = "{}zstash ls --tars --hpss={}".format(zstash_path, self.hpss_path) output, err = run_cmd(cmd) self.check_strings(cmd, output + err, ["000000.tar"], ["ERROR"]) print_starred("Testing zstash ls --tars -l") cmd = "{}zstash ls --tars -l --hpss={}".format(zstash_path, self.hpss_path) output, err = run_cmd(cmd) self.check_strings(cmd, output + err, ["000000.tar"], ["ERROR"]) os.chdir(TOP_LEVEL)
def helperCheckParallelVerboseMismatch( self, test_name, hpss_path, zstash_path=ZSTASH_PATH ): """ Test `zstash check -v` in parallel with MD5 mismatch. """ self.hpss_path = hpss_path use_hpss = self.setupDirs(test_name) self.create(use_hpss, zstash_path) self.add_files(use_hpss, zstash_path) self.extract(use_hpss, zstash_path) print_starred("Causing MD5 mismatch errors and checking the files in parallel.") self.assertWorkspace() os.chdir(self.test_dir) shutil.copy( "{}/index.db".format(self.cache), "{}/index_old.db".format(self.cache) ) print("Messing up the MD5 of all of the files with an even id.") sqlite_cmd = [ "sqlite3", "{}/index.db".format(self.cache), "UPDATE files SET md5 = 0 WHERE id % 2 = 0;", ] run_cmd(sqlite_cmd) zstash_cmd = "{}zstash check -v --hpss={} --workers=3".format( zstash_path, self.hpss_path ) output, err = run_cmd(zstash_cmd) # These files have an even `id` in the sqlite3 table. expected_present = [ "md5 mismatch for: dir/file1.txt", "md5 mismatch for: file3.txt", "ERROR: 000001.tar", "ERROR: 000004.tar", "ERROR: 000002.tar", ] # These files have an odd `id` in the sqlite3 table. expected_absent = [ "ERROR: 000000.tar", "ERROR: 000003.tar", "ERROR: 000005.tar", ] self.check_strings(zstash_cmd, output + err, expected_present, expected_absent) # Put the original index.db back. os.remove("{}/index.db".format(self.cache)) shutil.copy( "{}/index_old.db".format(self.cache), "{}/index.db".format(self.cache) ) os.chdir(TOP_LEVEL) print("Verifying the data from database with the actual files") # Checksums from HPSS sqlite_cmd = [ "sqlite3", "{}/{}/index.db".format(self.test_dir, self.cache), "SELECT md5, name FROM files;", ] output_hpss, err_hpss = run_cmd(sqlite_cmd) hpss_dict = {} for checksum in output_hpss.split("\n"): checksum = checksum.split("|") if len(checksum) >= 2: f_name = checksum[1] f_hash = checksum[0] hpss_dict[f_name] = f_hash # Checksums from local files find_cmd = "find {} ".format(self.backup_dir) find_cmd += ( r"""-regex .*\.txt.* -exec md5sum {} + """ # Literal {}, not for formatting ) output_local, err_local = run_cmd(find_cmd) local_dict = {} for checksum in output_local.split("\n"): checksum = checksum.split(" ") if len(checksum) >= 2: f_name = checksum[1].split("/") # remove the backup_dir f_name = "/".join(f_name[1:]) f_hash = checksum[0] local_dict[f_name] = f_hash print("filename|HPSS hash|local file hash") for k in local_dict: print("{}|{}|{}".format(k, hpss_dict[k], local_dict[k]))
def helperExtractVerbose(self, test_name, hpss_path, zstash_path=ZSTASH_PATH): """ Test `zstash extract -v`. """ self.hpss_path = hpss_path use_hpss = self.setupDirs(test_name) self.create(use_hpss, zstash_path) self.add_files(use_hpss, zstash_path) self.extract(use_hpss, zstash_path) print_starred( "Testing that nothing happens when extracting a second time") self.assertWorkspace() os.chdir(self.test_dir) cmd = "{}zstash extract -v --hpss={}".format(zstash_path, self.hpss_path) output, err = run_cmd(cmd) if use_hpss: # Check that self.copy_dir only contains `index.db`. if not compare(os.listdir(self.copy_dir), ["index.db"]): error_message = ( "The zstash directory should not have any tars.\nIt has: {}" .format(os.listdir(self.copy_dir))) self.stop(error_message) os.chdir(TOP_LEVEL) expected_present = [ "Not extracting file0.txt", "Not extracting file0_hard.txt", "Not extracting file_empty.txt", "Not extracting dir/file1.txt", "Not extracting dir2/file2.txt", "Not extracting file3.txt", "Not extracting file4.txt", "Not extracting file5.txt", ] expected_absent = [ "Not extracting file0_soft.txt", # It's okay to extract the symlinks. "ERROR", ] if use_hpss: # It's okay to extract empty dirs. expected_absent.append("Not extracting empty_dir") self.check_strings(cmd, output + err, expected_present, expected_absent) msg = "Deleting the extracted files and doing it again, " msg += "while making sure the tars are kept." print(msg) shutil.rmtree(self.test_dir) os.mkdir(self.test_dir) os.chdir(self.test_dir) if not use_hpss: shutil.copytree( "{}/{}/{}".format(TOP_LEVEL, self.backup_dir, self.cache), self.copy_dir) cmd = "{}zstash extract -v --hpss={} --keep".format( zstash_path, self.hpss_path) output, err = run_cmd(cmd) # Check that self.copy_dir contains all expected files if not compare( os.listdir(self.copy_dir), [ "index.db", "000000.tar", "000001.tar", "000002.tar", "000003.tar", "000004.tar", ], ): error_message = "The zstash directory does not contain expected files.\nIt has: {}".format( os.listdir(self.copy_dir)) self.stop(error_message) os.chdir(TOP_LEVEL) expected_present = [ "Extracting file0.txt", "Extracting file0_hard.txt", "Extracting file0_soft.txt", "Extracting file_empty.txt", "Extracting dir/file1.txt", "Extracting empty_dir", "Extracting dir2/file2.txt", "Extracting file3.txt", "Extracting file4.txt", "Extracting file5.txt", ] if use_hpss: expected_present.append("Transferring file from HPSS") expected_absent = ["ERROR", "Not extracting"] self.check_strings(cmd, output + err, expected_present, expected_absent)
def helperExtractParallel(self, test_name, hpss_path, zstash_path=ZSTASH_PATH): """ Test `zstash extract` in parallel. """ self.hpss_path = hpss_path use_hpss = self.setupDirs(test_name) self.create(use_hpss, zstash_path) self.add_files(use_hpss, zstash_path) self.extract(use_hpss, zstash_path) print_starred("Deleting the extracted files and doing it again in parallel.") self.assertWorkspace() shutil.rmtree(self.test_dir) os.mkdir(self.test_dir) os.chdir(self.test_dir) if not use_hpss: shutil.copytree( "{}/{}/{}".format(TOP_LEVEL, self.backup_dir, self.cache), self.copy_dir ) cmd = "{}zstash extract -v --hpss={} --workers=3".format( zstash_path, self.hpss_path ) output, err = run_cmd(cmd) os.chdir(TOP_LEVEL) expected_present = [ "Extracting file0.txt", "Extracting file0_hard.txt", "Extracting file0_soft.txt", "Extracting file_empty.txt", "Extracting dir/file1.txt", "Extracting empty_dir", "Extracting dir2/file2.txt", "Extracting file3.txt", "Extracting file4.txt", "Extracting file5.txt", ] if use_hpss: expected_present.append("Transferring file from HPSS") expected_absent = ["ERROR", "Not extracting"] self.check_strings(cmd, output + err, expected_present, expected_absent) # Checking that the printing was done in order. tar_order = [] console_output = output + err for word in console_output.replace("\n", " ").split(" "): if ".tar" in word: word = word.replace("{}/".format(self.cache), "") tar_order.append(word) if tar_order != sorted(tar_order): error_message = "The tars were printed in this order: {}\nWhen it should have been in this order: {}".format( tar_order, sorted(tar_order) ) self.stop(error_message) # Run again, without verbose option. shutil.rmtree(self.test_dir) os.mkdir(self.test_dir) os.chdir(self.test_dir) if not use_hpss: shutil.copytree( "{}/{}/{}".format(TOP_LEVEL, self.backup_dir, self.cache), self.copy_dir ) cmd = "{}zstash extract --hpss={} --workers=3".format( zstash_path, self.hpss_path ) output, err = run_cmd(cmd) os.chdir(TOP_LEVEL) self.check_strings(cmd, output + err, expected_present, expected_absent) # Checking that the printing was done in order. tar_order = [] console_output = output + err for word in console_output.replace("\n", " ").split(" "): if ".tar" in word: word = word.replace("{}/".format(self.cache), "") tar_order.append(word) if tar_order != sorted(tar_order): error_message = "The tars were printed in this order: {}\nWhen it should have been in this order: {}".format( tar_order, sorted(tar_order) ) self.stop(error_message)