def latencyBenchmark(filesList, loops=1): for loop in range(loops): for file in filesList: dd_cmd = "dd if={} of={}/{}".format(file, targetDirectory, file) dd_proc = subprocess.Popen(dd_cmd.split(), stdout=PIPE, stderr=subprocess.STDOUT) out = dd_proc.communicate()[0].decode('utf-8') fastlog(DEBUG, out)
def getGWDataFindQueries(query): urls = queryGWDataFind(query) fastlog(DEBUG, "Found {} files".format(len(urls))) queries = [] for url in urls: GPSTimeframeStart = [int(X) for X in url.split('-') if X.isdigit()][-1] queries.append("gwdata://{}-{}-{}-{}".format(query.observatory, query.frametype, GPSTimeframeStart, GPSTimeframeStart+1)) return queries
def convertSubfile(useCVMFS=False, subFilePath=None): sub_file = subFilePath if not sub_file: readline.set_completer_delims(' \t\n;') readline.parse_and_bind("tab: complete") readline.set_completer(complete) fastlog(WARNING, 'Where is the original .sub file path? ') sub_file = input() return convertSub(sub_file, worker_node_log_dir="./logs", useCVMFS=useCVMFS, ignore_exe_not_found=True)
def validateQuery(self): initGWDataFind() if self.observatory in GWDataFindObservatories: if self.frametype in GWDataFindTypes[self.observatory]: return True else: fastlog(ERROR,"Dataframe type is not valid. Aborting.") raise ValueError("Dataframe type is not valid.") else: fastlog(ERROR,"Observatory name is not valid. Aborting.") raise ValueError("Observatory name is not valid.") return False
def main(args): try: inputs = combineLists(args) except BaseException: fastlog(ERROR, "Errors while handling input files. Aborting.") if is_file(args.subfile): originalsub = open(args.subfile, 'r') else: fastlog(ERROR,"Original .sub file not found. Aborting.") raise ValueError("Invalid original submit file path.") generateSubmitFiles(originalsub, inputs)
def askQuestion(question, tabbing='', default=False): if default: options = " [Y/n] " else: options = " [N/y] " while True: fastlog(WARNING, tabbing + question + options) answer = input() if answer in valid: return valid[answer] else: return default
def queryGWDataFind(query : GWQuery): if shouldRenew(): fastlog(INFO, "Creating new VOMS proxy or VO=virgo:virgo/virgo and 48h validity.") generateVomsProxy("virgo:virgo/virgo", 48) if not query.validateQuery(): fastlog(ERROR,"GWDataFind query not valid. Aborting.") raise ValueError("GWDataFind query not valid. Aborting.") return initGWDataFind() return GWDataFindConn.find_urls(query.observatory, query.frametype, query.GPSTSStart, query.GPSTSStop)
def parseFFL(path): ffl = open(path, 'r') fileListBuffer = [] for line in ffl: sline = line.rstrip("\n") if is_file(sline): fileListBuffer.append(sline) else: fastlog(ERROR,"File {} not found. Skipping.".format(sline)) ffl.close() return fileListBuffer
def __init__(self, observatory, frametype=None, GPSTSStart=None, GPSTSStop=None): if frametype and GPSTSStart and GPSTSStop: self.observatory = observatory self.frametype = frametype self.GPSTSStart = GPSTSStart self.GPSTSStop = GPSTSStop else: sstring = observatory.split() try: self.observatory = sstring[0] self.frametype = sstring[1] self.GPSTSStart = (int)(sstring[2]) self.GPSTSStop = (int)(sstring[3]) except ValueError: fastlog(ERROR,"Wrong input string for GWDataFind query generation: {}".format(str))
def readBenchmark(filesList, loops=1, blocksize=512, pattern='random'): bandwidthMeasurements = [] for file in filesList: if pattern=='random': fastlog(INFO, "Performing random read benchmark on file {}.".format(file)) blockscount = math.floor(os.path.getsize(file)/blocksize) offsets = list(range(0, blockscount * blocksize, blocksize)) shuffle(offsets) elif pattern=='sequential': fastlog(INFO, "Performing sequential read benchmark on file {}.".format(file)) offsets = [0] blocksize = os.path.getsize(file) else: fastlog(ERROR, "Unsupported read benchmark pattern '{}'. Aborting!".format(pattern)) return partialMeasurements = [] for loop in range(loops): fastlog(DEBUG, "Starting loop {}".format(loop)) global readOptions readfile = os.open(file, readOptions, 0o777) for i, offset in enumerate(offsets, 1): if i%100000 == 0: fastlog(DEBUG, "Offset {}/{}".format(i,len(offsets))) start = time.time() os.lseek(readfile, offset, os.SEEK_SET) buff = os.read(readfile, blocksize) elapsed = time.time() - start if not buff: break partialMeasurements.append(blocksize/elapsed) os.close(readfile) bandwidthMeasurements.append(sum(partialMeasurements)/len(partialMeasurements)) printBandwidth(bandwidthMeasurements[-1]) partialMeasurements = [] return bandwidthMeasurements
def initGWDataFind(enforce=False): global GWDataFindInitialized if not GWDataFindInitialized or enforce: fastlog(INFO,"Initializing GWDataFind connection.") if shouldRenew(): fastlog(INFO, "Creating new VOMS proxy or VO=virgo:virgo/virgo and 48h validity.") generateVomsProxy("virgo:virgo/virgo", 48) global GWDataFindConn GWDataFindConn = connect(GWDataFindServer) global GWDataFindObservatories GWDataFindObservatories = GWDataFindConn.find_observatories() global GWDataFindTypes GWDataFindTypes = {} for obs in GWDataFindObservatories: GWDataFindTypes[obs] = GWDataFindConn.find_types(obs) GWDataFindInitialized = True
def generateVOMSProxyIfNeeded(voms_string="--voms virgo:/virgo/virgo"): remaining_VOMS_time = getRemainingValidity() if not remaining_VOMS_time == 0: fastlog( INFO, "Current VOMS proxy lasts for {} seconds.".format( remaining_VOMS_time)) if askQuestion("Is it enough?"): fastlog(DEBUG, "Skipping VOMS proxy creation.") return fastlog(INFO, "\n---> Creating VOMS proxy for submission...\n") command = "voms-proxy-init " + voms_string subprocess.call(command.split())
def condorSubmitWrapper(argv, sub_file_path: Path): condor_sub_command = "condor_submit " + ' '.join( argv) + " " + sub_file_path.as_posix() fastlog(DEBUG, condor_sub_command) subprocess.call(condor_sub_command.split())
def benchmark(mode, useRamdisk = False, blocksize = 1024, loops = 1, file = None): if useRamdisk: fastlog(INFO, "Creating ramdisk... ") targetRamdisk = ramDisk(ramdiskPath) global targetDirectory targetDirectory = targetRamdisk.path fastlog(INFO, "Ramdisk created!") elif not is_directory(targetDirectory): os.mkdir(targetDirectory) if isinstance(file, list): testfiles = file else: testfiles = [file] if "readrand" in mode: fastlog(DEBUG, mode) readBenchmark(testfiles, pattern='random', blocksize=blocksize, loops=loops) if "readseq" in mode: fastlog(DEBUG, mode) readBenchmark(testfiles, pattern='sequential', blocksize=blocksize, loops=loops) if "iopsrand" in mode: fastlog(DEBUG, mode) IOPSBenchmark(testfiles, pattern='random', blocksize=blocksize, loops=loops) if "iopsseq" in mode: fastlog(DEBUG, mode) IOPSBenchmark(testfiles, pattern='sequential', blocksize=blocksize, loops=loops) if useRamdisk: fastlog(INFO, "Unmounting ramdisk... ") del targetRamdisk fastlog(INFO, "Ramdisk destroyed!") elif is_directory(targetDirectory): shutil.rmtree(targetDirectory)
def generateSubmitFiles(originalsub, inputs): outDir = args.output if not is_directory(outDir): createDir(outDir) else: fastlog(WARNING, "Provided output directory already there.") input("Press Enter to overwrite. Ctrl-C to abort.") shutil.rmtree(outDir) createDir(outDir) inputFound = False for j, inpt in enumerate(inputs): originalsub.seek(0) fastlog(DEBUG, "\tInput list {}: {}".format(j, inpt)) newSubDir = outDir+"/"+str(j)+"/" createDir(newSubDir) newSubPath = newSubDir+args.subfile.replace(".sub",".{}.sub".format(j)) newSub = open(newSubPath, 'w') fastlog(DEBUG, "\tReworking file {}".format(j)) for line in originalsub: if line.startswith("transfer_input_files ="): inputFound = True oldInputs = line.replace("transfer_input_files =",'').rstrip("\n").replace(' ','').split(',') for oldInput in oldInputs: if not is_abs_path(oldInput) and not oldInput.startswith("gwdata://") and is_file(oldInput): os.symlink(oldInput, newSubDir+os.path.basename(oldInput)) else: fastlog(WARNING, "{} is abs path or not found".format(oldInput)) if oldInputs: newSub.write("transfer_input_files = {}, {}\n".format(', '.join(inpt), ', '.join(oldInputs))) else: newSub.write("transfer_input_files = {}\n".format(', '.join(inpt))) else: newSub.write(line) newSub.write("\n") newSub.close() fastlog(INFO, "\tSubmit file for {} and {} at {}".format(inpt, oldInputs, newSubPath)) originalsub.close() if not inputFound: fastlog(ERROR, "No line starting with 'input_file_trasfer' found in template .sub. Aborting.") raise BaseException("No valid placeholder for input file found.") return
def printBandwidth(measuredBytesPerSecond, rounding=2): log = math.log(measuredBytesPerSecond, 2**10) roundlog = math.floor(log) prefix = sizePrefixes[roundlog] bandwidth = measuredBytesPerSecond/math.pow(2**10,roundlog) fastlog(WARNING, "Measured bandwidth: {} {}B/s".format(round(bandwidth, rounding), prefix))
def combineLists(args): filesLists = [] if args.gwdatafind: fastlog(INFO, "Using GWDataFind to obtain input files.") queries = args.gwdatafind.split(':') for query in queries: filesLists.append(getGWDataFindQueries(GWQuery(query))) if args.ffl: fastlog(INFO, "Using FFL-defined input files.") ffls = args.ffl.split(':') for ffl in ffls: if not is_file(ffl): fastlog(ERROR, "FFL file not found at {}. Aborting.".format(args.ffl)) return -1 fastlog(INFO, "Parsing FFL file {} to obtain input files.".format(ffl)) filesLists.append(parseFFL(ffl)) fastlog(INFO, "Obtained {} lists.".format(len(filesLists))) for i, filesList in enumerate(filesLists): fastlog(INFO, "\tObtained {} files in list {}.".format(len(filesList), i+1)) fastlog(INFO, "Generating input lists and submit files from template .sub .") lenght = len(filesLists[0]) for list in filesLists[1:]: if not len(list) == lenght: fastlog(WARNING, "Input file lists are not of equal lenght. Inputs concatenation will crop all the lists to the lenght of the shortest one.") inputs = zip(*filesLists) return inputs
def createDir(path): try: os.stat(path) except: fastlog(INFO, "Creating output dir: {}".format(path)) os.makedirs(path)
def getGWDataFindURLs(query): urls = queryGWDataFind(query) fastlog(DEBUG, "Found {} files".format(len(urls))) return urls
parser.add_argument("-bs", "--blocksize", type=int, help='Block size to be used for the tests.') parser.add_argument("-l", "--loops", type=int, help='Number of tests to perform for each measure.') parser.add_argument("-f", "--file", type=str, help='Input file for the tests.') modes = ["readrand","readseq","iopsrand","iopsseq","all"] parser.add_argument("-m", "--mode", type=str, help='Test to perform. Available choices {}.'.format(modes)) args = parser.parse_args() if args.blocksize is not None: blocksize = args.blocksize else: blocksize = 1024 if args.loops is not None: loops = args.loops else: loops = 1 if args.file is not None: file = args.file else: file = "test_file" if args.mode is not None and args.mode in modes: if args.mode=="all": args.mode = ','.join(modes) fastlog(UI, "Performing benchmark with blocksize {} and {} tests of each kind.".format(blocksize, loops)) benchmark(args.mode, blocksize=blocksize, loops=loops, file=file) fastlog(UI, "Done! Bye bye") else: fastlog(ERROR, "Invalid mode. Choose one of {}. Aborting!".format(modes))
def IOPSBenchmark(filesList, loops=1, blocksize=512, pattern='random'): sectorsize = 4096 IOPSMeasurements = [] if pattern=='random': fastlog(INFO, "Performing random IOPS measurement") elif pattern=='sequential': fastlog(INFO, "Performing sequential IOPS measurement") else: fastlog(ERROR, "Unsupported IOPS measurement pattern '{}'. Aborting!".format(pattern)) return for loop in range(loops): fastlog(DEBUG, "Starting loop {}".format(loop)) for file in filesList: global readOptions fh = os.open(file, readOptions, 0o777) blockscount = math.floor(os.path.getsize(file)/blocksize) if pattern=='random': offsets = list(range(0, blockscount * blocksize, blocksize)) shuffle(offsets) elif pattern=='sequential': offsets = [0] * blockscount count = 0 start = time.time() for i, offset in enumerate(offsets, 1): if i%100000 == 0: fastlog(DEBUG, "Offset {}/{}".format(i,len(offsets))) os.lseek(fh, offset, os.SEEK_SET) blockdata = os.read(fh, blocksize) count += 1 elapsed = time.time() - start os.close(fh) IOPSMeasurements.append(round(count/elapsed)) fastlog(WARNING,"Measured IOPS: {}".format(IOPSMeasurements[-1])) return IOPSMeasurements
condor_sub_command = "condor_submit " + ' '.join( argv) + " " + sub_file_path.as_posix() fastlog(DEBUG, condor_sub_command) subprocess.call(condor_sub_command.split()) def printSeparator(): fastlog( UI, "/////////////////////////////////////////////////////////////////////////////////////////////////////////////////" ) if __name__ == "__main__": printSeparator() fastlog(UI, "\nWelcome to the Virgo HTCondor submitter wrapper!\n") fastlog(UI, "This tool will perform the following operations:") fastlog( UI, " 1. Evaluate wether a VOMS x509 proxy is needed for the submission or not and generate it." ) fastlog( UI, " 2. Generate a plain (without VOMS extensions) long lasting x509 proxy to be shipepd with the job." ) fastlog( UI, " 3. Rework your (awesome!) HTCondor submit file to encapsulate your executable inside a proxy rearming service." ) fastlog( UI,
def printSeparator(): fastlog( UI, "/////////////////////////////////////////////////////////////////////////////////////////////////////////////////" )
def convertSub(sub_file_path, worker_node_log_dir=None, main_executable_name=None, ignore_exe_not_found=False, useCVMFS=False): input_sub_file_path = Path(os.path.abspath(sub_file_path)) output_sub_file_path = getConvertedSubPath(input_sub_file_path, useCVMFS) script_path, script_path_relative = getScriptPath(input_sub_file_path) input_sub = open(input_sub_file_path, "r") executable_string = '' input_files = '' output_files = '' arguments = '' input_files_found = False output_files_found = False arguments_found = False for line in input_sub: if line.startswith("executable"): executable_string = purgeLineHeader(line) elif line.startswith("transfer_input_files"): input_files_found = True input_files = purgeLineHeader(line) elif line.startswith("transfer_output_files"): output_files_found = True output_files = purgeLineHeader(line) elif line.startswith("arguments"): arguments_found = True arguments = purgeLineHeader(line) if executable_string == "run-with-proxy-satellite.py": fastlog(ERROR, "ERROR: this file has already been reworked!") input_sub.close() return if input_files_found: input_files = input_files + ',' input_sub.seek(0) output_sub = open(output_sub_file_path, "w+") output_script = open(script_path, "w+") output_script.write('#! /bin/bash\n') if is_file(executable_string): if is_exe(executable_string): if not is_abs_path(executable_string): fastlog( DEBUG, "The file is executable but need the prepention of \"./\"") executable_string = './' + executable_string else: fastlog( WARNING, "Warning: {} is not (an) executable. Try running \"chmod +x {}\"" .format(executable_string, executable_string)) return else: if not ignore_exe_not_found: fastlog(ERROR, "Error: the executable cannot be located. Aborting.") return else: if not is_abs_path(executable_string): fastlog( DEBUG, "The file is executable but need the prepention of \"./\"") executable_string = './' + executable_string output_script.write(executable_string + ' ' + arguments) output_script.write("\n\n") output_script.close() st = os.stat(script_path) os.chmod(script_path, st.st_mode | stat.S_IEXEC) virgo_wrapper = "run-with-proxy-satellite.py" new_input_files = input_files + script_path_relative + ",./plainproxy.pem" if useCVMFS: virgo_wrapper = CVMFS_repo_path + "/" + virgo_wrapper else: new_input_files = new_input_files + ',' + ','.join( required_input_files) new_input_files = new_input_files + "\n" for wline in input_sub: if wline.startswith("executable"): output_sub.write("executable = {}\n".format(virgo_wrapper)) if useCVMFS: output_sub.write("transfer_executable = false\n") if not input_files_found: output_sub.write("transfer_input_files = " + new_input_files) if not output_files_found and worker_node_log_dir: output_sub.write("transfer_output_files = " + worker_node_log_dir + "\n") if not arguments_found: output_sub.write("arguments = " + script_path_relative) elif wline.startswith("transfer_input_files"): output_sub.write("transfer_input_files = " + new_input_files) elif wline.startswith("arguments"): output_sub.write("arguments = " + script_path_relative) elif wline.startswith("transfer_output_files"): if worker_node_log_dir: output_sub.write("transfer_output_files = " + output_files + ',' + worker_node_log_dir + "\n") else: output_sub.write(wline) else: output_sub.write(wline) output_sub.write("\n") fastlog(DEBUG, "Reworked .sub file at: " + output_sub_file_path.as_posix()) output_sub.close() input_sub.close() return output_sub_file_path