def run_indri(args, output, overwrite_threads=False): from subprocess import Popen, PIPE import os cancel = Variable('cancel', get_client()) if cancel.get(): return ('canceled', get_worker().address, 0, get_loadinfo()) start = time.time() if overwrite_threads: processes = len(os.sched_getaffinity(0)) - 1 args = (args[0], '-threads={}'.format(processes), *args[1:]) with Popen(args, stdout=PIPE, stderr=PIPE) as proc: content = [] for l in proc.stdout: content.append(l) if len(content) % 1000 != 0: continue if cancel.get(): proc.kill() return ('killed', get_worker().address, time.time() - start, get_loadinfo()) with open(output, 'wb') as f: f.writelines(content) return ('completed', get_worker().address, time.time() - start, get_loadinfo())
class ClusterShareMemory(ShareMemory): """Share Memory for dask cluster.""" def __init__(self, name): from dask.distributed import Variable self.var = Variable(name, client=ShareMemoryClient().client) def put(self, value): """Put value into shared data.""" self.var.set(str(value)) def get(self): """Get value from shared data.""" # TODO: block issue when var no data. return ast.literal_eval(self.var.get(timeout=2)) def delete(self): """Delete data according to name.""" self.var.delete() def close(self): """Close Share Memory.""" ShareMemoryClient().close()
def run_test( test_config: TestConfig, incoming_state: dict, hostnames: List[str], timeout_signal_name: str = None, ) -> dict: """ Runs actions and asserts in provided test and returns new state with finished actions/asserts Args: test_config: test configuration to run incoming_state: Initial state of test (does not modify) hostnames: Addresses of runners to run actions/asserts on timeout_signal_name: Optional Dask variable to check if test has timed out so it can end gracefully Returns: New state after running actions and asserts """ actions = test_config.get("actions", []) asserts = test_config.get("asserts", []) default_cycles = get_default_cycles(actions, asserts) remaining_cycles = test_config.get("cycles", default_cycles) completed_cycles = 0 # NOTE: possibly use infinite default dict state = defaultdict(dict, incoming_state) # Validate test before running action_names = [] assert_names = [] for action in actions: assert ( "type" in action ), f"Action in test '{test_config['name']}' is missing property 'type'" action_name = action.get("name") if action_name is None: action_name = create_item_name(action["type"], action_names) # NOTE: sets action name if not set action["name"] = action_name action_names.append(action_name) for asrt in asserts: assert ( "type" in asrt ), f"Assert in test '{test_config['name']}' is missing property 'type'" assert_name = asrt.get("name") if assert_name is None: assert_name = create_item_name(asrt["type"], assert_names) # NOTE: sets assert name if not set asrt["name"] = assert_name assert_names.append(assert_name) assert hostnames, "Must have at least one host to run tests" assert len(set(action_names)) == len( action_names ), "Action names if specified must be unique" assert len(set(assert_names)) == len( assert_names ), "Assert names if specified must be unique" start_time = datetime.now() # stop if remaining_cycles == 0 or had asserts and no asserts remain while continue_running( asserts, remaining_cycles, state[test_config["name"]].get("asserts", {}) ): # Check if running with a timeout and break if timeout has signaled if timeout_signal_name is not None: keep_going = Variable(timeout_signal_name, client=get_client()) if not keep_going.get(): break # NOTE: exceptions thrown in actions/asserts cause rest of test to exit action_distribution_strategy = test_config.get( "actionDistributionStrategy", "parallel" ) if actions: assert action_distribution_strategy in [ "parallel", "series", ], f"actionDistributionStrategy must be 'parallel' or 'series', got '{action_distribution_strategy}'" if action_distribution_strategy == "series": run_actions_func = run_actions_series else: run_actions_func = run_actions_parallel state[test_config["name"]]["actions"] = run_actions_func( actions, state, test_config["name"], hostnames, test_config.get("secondsBetweenActions", 0), ) assert_distribution_strategy = test_config.get( "assertDistributionStrategy", "series" ) if asserts: assert assert_distribution_strategy in [ "parallel", "series", ], f"assertDistributionStrategy must be 'parallel' or 'series', got '{assert_distribution_strategy}'" if assert_distribution_strategy == "parallel": run_asserts_func = run_asserts_parallel else: run_asserts_func = run_asserts_series state[test_config["name"]]["asserts"] = run_asserts_func( asserts, state, test_config["name"], hostnames, test_config.get("secondsBetweenAsserts", 0), ) remaining_cycles -= 1 completed_cycles += 1 # Wait between cycles if test is to continue running if continue_running( asserts, remaining_cycles, state[test_config["name"]].get("asserts", {}) ): time.sleep(test_config.get("secondsBetweenCycles", 1)) remaining_asserts = get_remaining_asserts( asserts, state[test_config["name"]].get("asserts", {}) ) state[test_config["name"]]["summary"] = TestSummary( description=test_config.get("description"), completed_cycles=completed_cycles, remaining_asserts=[asrt["name"] for asrt in remaining_asserts], error=None, duration=(datetime.now() - start_time).seconds, ) return state