def the_job(test_dir, resources): job_script = format_script("#!/usr/bin/python3\n" + textwrap.dedent(""" import os import sys import time elapsed = 0; while elapsed < int(sys.argv[1]):""" + "".join(f""" os.system('condor_status -ads ${{_CONDOR_SCRATCH_DIR}}/.update.ad -af Assigned{resource}s {resource}sMemoryUsage') """ for resource in resources) + """ time.sleep(1) elapsed += 1 """)) script_file = test_dir / "poll-memory.py" write_file(script_file, job_script) job_spec = { "executable": script_file.as_posix(), "arguments": "17", "log": (test_dir / "events.log").as_posix(), "output": (test_dir / "poll-memory.$(Cluster).$(Process).out").as_posix(), "error": (test_dir / "poll-memory.$(Cluster).$(Process).err").as_posix(), "getenv": "true", "LeaveJobInQueue": "true", } for resource in resources: job_spec[f"request_{resource}s"] = "2" return job_spec
def condor(test_dir, slot_config): for resource in resources.keys(): sequence = { f"{resource}{i}": j for i, j in enumerate(usages[resource]) } discovery_script = format_script(discovery_script_for(resource, sequence)) write_file(test_dir / f"{resource}-discovery.py", discovery_script) sequences = { f"{resource}{i}": j for i, j in enumerate(peaks[resource]) } monitor_script = both_monitor_script(resource, sequence, sequences) write_file(test_dir / f"{resource}-monitor.py", monitor_script) with Condor( local_dir=test_dir / "condor", config={**slot_config, "TEST_DIR": test_dir.as_posix()}, ) as condor: # Ornithology will run condor_who to verify that all the daemons are running, # but occasionally, not all slots will have made it to the collector num_slots = int(slot_config["NUM_SLOTS"]) loop_count = 0 while num_slots != len(condor.status(ad_type=htcondor.AdTypes.Startd, projection=["SlotID"])): loop_count = loop_count + 1 assert(loop_count < 20) time.sleep(1) yield condor
def test_script_contents(): return format_script(""" #!/usr/bin/env python3 with open("new_output_file", "w") as f: f.write("") exit(0) """)
def job_shell_file(test_dir, job_python_file, plugin_log_file): job_shell_file = test_dir / "debug.sh" contents = format_script(f""" #!/bin/bash exec {job_python_file} $@ &> {plugin_log_file} """) write_file(job_shell_file, contents) return job_shell_file
def discovery_script(resources): return format_script( """ #!/usr/bin/python3 print('DetectedXXX="{res}"') """.format( res=", ".join(resources.keys()) ) )
def path_to_ordering_script(test_dir): script=""" #!/bin/bash cat a_file exit 0 """ path = test_dir / "ordering.sh" write_file(path, format_script(script)) return path
def peak_job_script(resource): return format_script("#!/usr/bin/python3\n" + textwrap.dedent(f""" import os import sys import time elapsed = 0; while elapsed < int(sys.argv[1]): os.system('condor_status -ads ${{_CONDOR_SCRATCH_DIR}}/.update.ad -af Assigned{resource}s {resource}sMemoryUsage') time.sleep(1) elapsed += 1 """))
def condor(test_dir, slot_config): for resource in resources.keys(): sequence = { f"{resource}{i}": j for i, j in enumerate(usages[resource]) } discovery_script = format_script(discovery_script_for(resource, sequence)) write_file(test_dir / f"{resource}-discovery.py", discovery_script) sequences = { f"{resource}{i}": j for i, j in enumerate(peaks[resource]) } monitor_script = both_monitor_script(resource, sequence, sequences) write_file(test_dir / f"{resource}-monitor.py", monitor_script) with Condor( local_dir=test_dir / "condor", config={**slot_config, "TEST_DIR": test_dir.as_posix()}, ) as condor: yield condor
def monitor_script(resources): return format_script( "#!/usr/bin/python3\n" + "".join( textwrap.dedent( """ print('SlotMergeConstraint = StringListMember( "{name}", AssignedXXX )') print('UptimeXXXSeconds = {increment}') print('- {name}') """.format( name=name, increment=increment ) ) for name, increment in resources.items() ) )
def path_to_the_job_script(test_dir): script = """ #!/usr/bin/python3 import sys import time import getopt total_steps = 24 num_completed_steps = 0 try: with open("saved-state", "r") as saved_state: num_completed_steps = int(saved_state.readline().strip()) except IOError: pass while num_completed_steps < total_steps: print(f"Starting step {num_completed_steps}.") time.sleep(3) num_completed_steps += 1 if num_completed_steps % 5 == 0: print(f"Checkpointing after {num_completed_steps}.") try: with open("saved-state", "w") as saved_state: saved_state.write(f"{num_completed_steps}") sys.exit(17) except IOError: print("Failed to write checkpoint.", file=sys.stderr); sys.exit(1) print(f"Completed all {total_steps} steps.") sys.exit(0) """ path = test_dir / "counting.py" write_file(path, format_script(script)) return path
def path_to_prp_script(default_condor, test_dir): script=f""" #!/bin/bash export CONDOR_CONFIG={default_condor.config_file} export PATH=$PATH:{os.environ["PATH"]} DATA=`tail -n 1 {test_dir}/prp-test-file` echo "Starting up..." >> {test_dir}/prp-test-file if [[ $DATA == "" ]]; then echo "step one" >> {test_dir}/prp-test-file echo "step one" >> prp/data/data_file mkdir prp/data/subdir echo "step one" >> prp/data/subdir/other_data_file exit 85 fi if [[ $DATA == "step one" ]]; then echo "step two" >> {test_dir}/prp-test-file echo "step two" >> prp/data/data_file condor_vacate_job $1 # Don't exit before we've been vacated. sleep 60 # We did not succeed. exit 1 fi if [[ $DATA == "step two" ]]; then echo "step three" >> {test_dir}/prp-test-file echo "step three" >> prp/data/data_file exit 0 fi echo "step never-never" >> {test_dir}/prp-test-file echo "step never-never" >> prp/data/data_file exit 1 """ path = test_dir / "prp.sh" write_file(path, format_script(script)) return path
def path_to_directory_script(default_condor, test_dir): script=f""" #!/bin/bash export CONDOR_CONFIG={default_condor.config_file} export PATH=$PATH:{os.environ["PATH"]} cat data/data_file >> {test_dir}/directory-test-file DATA=`cat data/data_file` if [[ $DATA == "first job modification" ]]; then echo "second job modification" > data/data_file exit 0 fi echo "first job modification" > data/data_file condor_vacate_job $1 # Don't exit before we've been vacated. sleep 60 exit 0 """ path = test_dir / "directory.sh" write_file(path, format_script(script)) return path
def path_to_job_two_script(test_dir): script = """ #!/usr/bin/python3 import sys import time nap = 0 nap_lengths = [5, 10, 15] try: with open("saved-state", "r") as saved_state: nap = int(saved_state.readline().strip()) print(f"Restarting naps from #{nap}") except IOError: pass print(f"Nap #{nap} will be {nap_lengths[nap]} seconds long.") time.sleep(nap_lengths[nap]) nap += 1 if nap >= len(nap_lengths): print(f"Completed all naps.") sys.exit(0) try: with open("saved-state", "w") as saved_state: saved_state.write(f"{nap}") sys.exit(17) except IOError: print("Failed to write checkpoint.", file=sys.stderr); sys.exit(1) """ path = test_dir / "counting.py" write_file(path, format_script(script)) return path
def job_python_file(test_dir): job_python_file = test_dir / "debug.py" contents = format_script(""" #!/usr/bin/python3 import classad import json import os import posixpath import shutil import socket import sys import time from urllib.parse import urlparse DEFAULT_TIMEOUT = 30 PLUGIN_VERSION = '1.0.0' EXIT_SUCCESS = 0 EXIT_FAILURE = 1 EXIT_AUTHENTICATION_REFRESH = 2 def print_help(stream = sys.stderr): help_msg = '''Usage: {0} -infile <input-filename> -outfile <output-filename> {0} -classad Options: -classad Print a ClassAd containing the capablities of this file transfer plugin. -infile <input-filename> Input ClassAd file -outfile <output-filename> Output ClassAd file -upload Indicates this transfer is an upload (default is download) ''' stream.write(help_msg.format(sys.argv[0])) def print_capabilities(): capabilities = { 'MultipleFileSupport': True, 'PluginType': 'FileTransfer', 'SupportedMethods': 'debug', 'Version': PLUGIN_VERSION, } sys.stdout.write(classad.ClassAd(capabilities).printOld()) def parse_args(): # The only argument lists that are acceptable are # <this> -classad # <this> -infile <input-filename> -outfile <output-filename> # <this> -outfile <output-filename> -infile <input-filename> if not len(sys.argv) in [2, 5, 6]: print_help() sys.exit(EXIT_FAILURE) # If -classad, print the capabilities of the plugin and exit early if (len(sys.argv) == 2) and (sys.argv[1] == '-classad'): print_capabilities() sys.exit(EXIT_SUCCESS) # If -upload, set is_upload to True and remove it from the args list is_upload = False if '-upload' in sys.argv[1:]: is_upload = True sys.argv.remove('-upload') # -infile and -outfile must be in the first and third position if not ( ('-infile' in sys.argv[1:]) and ('-outfile' in sys.argv[1:]) and (sys.argv[1] in ['-infile', '-outfile']) and (sys.argv[3] in ['-infile', '-outfile']) and (len(sys.argv) == 5)): print_help() sys.exit(1) infile = None outfile = None try: for i, arg in enumerate(sys.argv): if i == 0: continue elif arg == '-infile': infile = sys.argv[i+1] elif arg == '-outfile': outfile = sys.argv[i+1] except IndexError: print_help() sys.exit(EXIT_FAILURE) return {'infile': infile, 'outfile': outfile, 'upload': is_upload} def format_error(error): return '{0}: {1}'.format(type(error).__name__, str(error)) def get_error_dict(error, url = ''): error_string = format_error(error) error_dict = { 'TransferSuccess': False, 'TransferError': error_string, 'TransferUrl': url, } return error_dict class DebugPlugin: # Extract whatever information we want from the url provided. # In this example, convert the example://path/to/file url to a # path in the file system (ie. /path/to/file) def parse_url(self, url): url_path = url[(url.find("://") + 3):] return url_path def download_file(self, url, local_file_path): start_time = time.time() # Download transfer logic goes here print(f"DEBUG: download {url} -> {local_file_path}") file_size = 0 end_time = time.time() # Get transfer statistics transfer_stats = { 'TransferSuccess': True, 'TransferProtocol': 'example', 'TransferType': 'upload', 'TransferFileName': local_file_path, 'TransferFileBytes': file_size, 'TransferTotalBytes': file_size, 'TransferStartTime': int(start_time), 'TransferEndTime': int(end_time), 'ConnectionTimeSeconds': end_time - start_time, 'TransferUrl': url, } return transfer_stats def upload_file(self, url, local_file_path): start_time = time.time() # Upload transfer logic goes here print(f"DEBUG: upload {local_file_path} --> {url}") file_size = 0 end_time = time.time() # Get transfer statistics transfer_stats = { 'TransferSuccess': True, 'TransferProtocol': 'example', 'TransferType': 'upload', 'TransferFileName': local_file_path, 'TransferFileBytes': file_size, 'TransferTotalBytes': file_size, 'TransferStartTime': int(start_time), 'TransferEndTime': int(end_time), 'ConnectionTimeSeconds': end_time - start_time, 'TransferUrl': url, } return transfer_stats if __name__ == '__main__': # Start by parsing input arguments try: args = parse_args() except Exception: sys.exit(EXIT_FAILURE) debug_plugin = DebugPlugin() # Parse in the classads stored in the input file. # Each ad represents a single file to be transferred. try: infile_ads = classad.parseAds(open(args['infile'], 'r')) except Exception as err: try: with open(args['outfile'], 'w') as outfile: outfile_dict = get_error_dict(err) outfile.write(str(classad.ClassAd(outfile_dict))) except Exception: pass sys.exit(EXIT_FAILURE) # Now iterate over the list of classads and perform the transfers. try: with open(args['outfile'], 'w') as outfile: for ad in infile_ads: try: if not args['upload']: outfile_dict = debug_plugin.download_file(ad['Url'], ad['LocalFileName']) else: outfile_dict = debug_plugin.upload_file(ad['Url'], ad['LocalFileName']) outfile.write(str(classad.ClassAd(outfile_dict))) except Exception as err: try: outfile_dict = get_error_dict(err, url = ad['Url']) outfile.write(str(classad.ClassAd(outfile_dict))) except Exception: pass sys.exit(EXIT_FAILURE) except Exception: sys.exit(EXIT_FAILURE) """) write_file(job_python_file, contents) return job_python_file
def monitor_script(the_config): return format_script(the_config["monitor"])
def discovery_script(): return format_script(discovery_script_for(resource, resources))