def condor(test_dir, slot_config): for resource in resources.keys(): sequence = { f"{resource}{i}": j for i, j in enumerate(usages[resource]) } discovery_script = format_script(discovery_script_for(resource, sequence)) write_file(test_dir / f"{resource}-discovery.py", discovery_script) sequences = { f"{resource}{i}": j for i, j in enumerate(peaks[resource]) } monitor_script = both_monitor_script(resource, sequence, sequences) write_file(test_dir / f"{resource}-monitor.py", monitor_script) with Condor( local_dir=test_dir / "condor", config={**slot_config, "TEST_DIR": test_dir.as_posix()}, ) as condor: # Ornithology will run condor_who to verify that all the daemons are running, # but occasionally, not all slots will have made it to the collector num_slots = int(slot_config["NUM_SLOTS"]) loop_count = 0 while num_slots != len(condor.status(ad_type=htcondor.AdTypes.Startd, projection=["SlotID"])): loop_count = loop_count + 1 assert(loop_count < 20) time.sleep(1) yield condor
def condor(test_dir, slot_config): # set all of the concurrency limits for each slot config, # so that we can run all the actual job submits against the same config concurrency_limit_config = { v["config-key"]: v["config-value"] for v in CONCURRENCY_LIMITS.values() } with Condor( local_dir=test_dir / "condor", config= { **slot_config, **concurrency_limit_config, # The negotiator determines if a concurrency limit is in use by # checking the machine ads, so it will overcommit tokens if its # cycle time is shorter than the claim-and-report cycle. # # I'm not sure why the claim-and-report cycle is so long. "NEGOTIATOR_INTERVAL": "4", "NEGOTIATOR_CYCLE_DELAY": "4", }, ) as condor: yield condor
def condor(test_dir, slot_config, discovery_script, monitor_script): write_file(test_dir / "discovery", discovery_script) write_file(test_dir / "monitor", monitor_script) with Condor( local_dir=test_dir / "condor", config={ **slot_config, "TEST_DIR": test_dir.as_posix() }, ) as condor: yield condor
def condor(test_dir, slot_config, discovery_script, monitor_script): write_file(test_dir / "discovery.py", discovery_script) write_file(test_dir / "monitor.py", monitor_script) with Condor( local_dir=test_dir / "condor", config={**slot_config, "TEST_DIR": test_dir.as_posix()}, ) as condor: # try to make sure the monitor runs before we continue with the test time.sleep(MONITOR_PERIOD * 1.5) yield condor
def condor(test_dir): with Condor( local_dir=test_dir / "condor", config={ "NUM_CPUS": "10", "NUM_SLOTS": "10", # must be larger than the max number of jobs we hope to materialize "SCHEDD_MATERIALIZE_LOG": "$(LOG)/MaterializeLog", "SCHEDD_DEBUG": "D_MATERIALIZE:2 D_CAT $(SCHEDD_DEBUG)", }, ) as condor: yield condor
def condor(test_dir): with Condor( local_dir=test_dir / "condor", config={ "SEC_DEFAULT_ENCRYPTION": "required", "SEC_DEFAULT_CRYPTO_METHODS": "AES", "SHADOW_DEBUG": "D_SECURITY:2", "STARTER_DEBUG": "D_SECURITY:2" }, ) as condor: yield condor
def condor(test_dir, slot_config): for resource in resources.keys(): sequence = { f"{resource}{i}": j for i, j in enumerate(usages[resource]) } discovery_script = format_script(discovery_script_for(resource, sequence)) write_file(test_dir / f"{resource}-discovery.py", discovery_script) sequences = { f"{resource}{i}": j for i, j in enumerate(peaks[resource]) } monitor_script = both_monitor_script(resource, sequence, sequences) write_file(test_dir / f"{resource}-monitor.py", monitor_script) with Condor( local_dir=test_dir / "condor", config={**slot_config, "TEST_DIR": test_dir.as_posix()}, ) as condor: yield condor
def condor(test_dir, slot_config, discovery_script, monitor_script): write_file(test_dir / "discovery.py", discovery_script) write_file(test_dir / "monitor.py", monitor_script) with Condor( local_dir=test_dir / "condor", config={**slot_config, "TEST_DIR": test_dir.as_posix()}, ) as condor: # Ornithology will run condor_who to verify that all the daemons are running, # but occasionally, not all 16 slots will have made it to the collector loop_count = 0 while 16 != len(condor.status(ad_type=htcondor.AdTypes.Startd, projection=["SlotID"])): loop_count = loop_count + 1 assert(loop_count < 20) time.sleep(1) yield condor
def condor(test_dir, daemon, path_to_exit_zero, path_to_exit_one, path_to_sig_kill): raw_config = f""" {daemon}_DEBUG = D_ALWAYS {daemon}_CRON_LOG_NON_ZERO_EXIT = TRUE {daemon}_CRON_JOBLIST = exit_zero, exit_one, sig_kill {daemon}_CRON_exit_zero_EXECUTABLE = {path_to_exit_zero} {daemon}_CRON_exit_zero_MODE = OneShot {daemon}_CRON_exit_zero_RECONFIG_RERUN = true {daemon}_CRON_exit_one_EXECUTABLE = {path_to_exit_one} {daemon}_CRON_exit_one_MODE = OneShot {daemon}_CRON_exit_one_RECONFIG_RERUN = true {daemon}_CRON_sig_kill_EXECUTABLE = {path_to_sig_kill} {daemon}_CRON_sig_kill_MODE = OneShot {daemon}_CRON_sig_kill_RECONFIG_RERUN = true """ with Condor( test_dir / "condor", raw_config=raw_config ) as condor: yield condor
def condor(test_dir, slot_config): # set all of the concurrency limits for each slot config, # so that we can run all the actual job submits against the same config concurrency_limit_config = { v["config-key"]: v["config-value"] for v in CONCURRENCY_LIMITS.values() } with Condor( local_dir=test_dir / "condor", config= { **slot_config, **concurrency_limit_config, # make the sure the negotiator runs many times within a single job duration "NEGOTIATOR_INTERVAL": "1", }, ) as condor: yield condor
def condor(test_dir, slot_config, concurrency_limit): # We don't want to share a startd for different concurrency limit tests, # because doing so introduces another race condition where we need to # startd to be totally idle before starting the next test. concurrency_limit_config = { concurrency_limit["config-key"]: concurrency_limit["config-value"] } with Condor( local_dir=test_dir / "condor", config= { **slot_config, **concurrency_limit_config, # The negotiator determines if a concurrency limit is in use by # checking the machine ads, so it will overcommit tokens if its # cycle time is shorter than the claim-and-report cycle. # # I'm not sure why the claim-and-report cycle is so long. "NEGOTIATOR_INTERVAL": "1", "NEGOTIATOR_MIN_INTERVAL": "1", "NEGOTIATOR_CYCLE_DELAY": "1", "UPDATE_INTERVAL": "1", # This MUST include D_MATCH, which is the default. "NEGOTIATOR_DEBUG": "D_MATCH D_CATEGORY D_SUB_SECOND", "SCHEDD_DEBUG": "D_FULLDEBUG", # Don't delay or decline to update the runnable job count for # any reason after receiving the reschedule command. "SCHEDD_MIN_INTERVAL": "0", "SCHEDD_INTERVAL_TIMESLICE": "1", }, ) as condor: yield condor
def condor(condor_config, test_dir): with Condor(test_dir / "condor", **condor_config) as condor: yield condor
def failure_injection_condor(test_dir, failure_injection_config): with Condor(test_dir / "condor", **failure_injection_config) as condor: yield condor
def successful_condor(successful_condor_config, test_dir): with Condor(test_dir / "condor", **successful_condor_config) as condor: yield condor
def default_condor(test_dir): with Condor(local_dir=test_dir / "condor") as condor: yield condor
def condor(test_dir): with Condor(local_dir=test_dir / "condor", config={"PERIODIC_EXPR_INTERVAL": PERIODIC_EXPR_INTERVAL}) as condor: yield condor