def run_case_set_against_ai(request): case_set_id = parse_validate_caseSetId(request["caseSetId"]) ai_implementation = request["aiImplementation"] run_name = request["runName"] assert ai_implementation in AI_TYPES_TO_LOCATIONS ai_location_path = AI_TYPES_TO_LOCATIONS[ai_implementation] run_hash = get_unique_id() path = os.path.join(FILE_DIR, "data", case_set_id, run_hash) cases = json.load( open(os.path.join(FILE_DIR, "data", case_set_id, "cases.json"))) results = [] for case in cases: try: request = requests.post( ai_location_path, json={ "aiImplementation": ai_implementation, "caseData": case["caseData"], }, timeout=TIMEOUT, ) assert request.status_code == 200 response = request.json() except AssertionError: response = None results.append(response) create_dirs(path) json.dump( { "ai_location_path": ai_location_path, "ai_implementation": ai_implementation, "run_name": run_name, }, open(os.path.join(path, "meta.json"), "w"), indent=2, ) json.dump(results, open(os.path.join(path, "results.json"), "w"), indent=2) return {"runId": run_hash, "results": results}
def generate_case_set(request): num_cases = int(request["numCases"]) # TODO: Gracefully fail for >200 cases assert num_cases > 0 and num_cases <= 200 cases = [] for case_id in range(num_cases): request = requests.get(SERVER_HOST_FOR_CASE_GENERATION + "/case-generator/v1/generate-case") assert request.status_code == 200 cases.append(request.json()) case_set_id = get_unique_id() path = os.path.join(FILE_DIR, "data", case_set_id) create_dirs(path) json.dump(cases, open(os.path.join(path, "cases.json"), "w"), indent=2) return {"case_set_id": case_set_id}
def _run_benchmark(self): self.__state = ManagerStatuses.RUNNING results = {} burnt_cases_path = os.path.join(DATA_DIR, 'burnt_cases') create_dirs(burnt_cases_path) message = f'Starting run of benchmark with id {self.benchmark_id}' self.accumulated_logs.append(message) logger.info(message) for case_num, case in enumerate(self.case_set): case_index = case_num + 1 case_id = case['caseData']['caseId'] results[case_id] = {} self.manager_report = self.db_client.update_manager_report( case_index, case_id, self.manager_report) runners_pipes = [pipe for (_, pipe) in self.runners_pool] random.shuffle(runners_pipes) message = f'Starting health checks for case #{case_index}...' self.accumulated_logs.append(message) logger.info(message) for pipe in runners_pipes: pipe.send((ProcessSignal.HEALTH_CHECK, {'case_id': case_id})) sentinels = 0 healthchecked_ai_ids = [] while sentinels < len(self.runners_pool): signal, runner_id, result = self.result_queue.get() if signal == ProcessSignal.SENTINEL: sentinels += 1 elif signal == ProcessSignal.HEALTH_CHECK: self.db_client.create_ai_report( self.manager_report, result['ai_name'], case_id, result['report'] ) if result['healthy']: healthchecked_ai_ids.append(runner_id) else: # in this case we need to add the failed ai to the results as # not healthchecked successfully output = { 'ai_name': result['ai_name'], 'result': result['result'], 'error': result['error'], 'case_status': result['case_status'], 'soft_timeout': result['soft_timeout'], 'hard_timeout': result['hard_timeout'], 'healthchecked': result['healthchecked'], } results[case_id][result['ai_name']] = output if result['log']: for log in result['log']: self.accumulated_logs.append(log) if healthchecked_ai_ids: message = ( 'The following AIs have passed the health check for case ' + f'#{case_index}: ' + ", ".join([ self.runners_pool[id_][0].ai_name for id_ in healthchecked_ai_ids ]) ) self.accumulated_logs.append(message) logger.info(message) # 'marks' case as 'burnt' case_burnt_path = os.path.join(burnt_cases_path, case_id) open(case_burnt_path, 'w').close() else: # if all healthchecks have failed, what to do? # TODO: implement rule for when all healthchecks have failed message = ( f'All AIs have failed the health check for ' f'case #{case_index}' ) self.accumulated_logs.append(message) logger.error(message) return {'runId': self.benchmark_id, 'results': results} for id_ in healthchecked_ai_ids: pipe = self.runners_pool[id_][1] pipe.send((ProcessSignal.SOLVE_CASE, {'case': case})) sentinels = 0 while sentinels < len(healthchecked_ai_ids): signal, runner_id, result = self.result_queue.get() if signal == ProcessSignal.SENTINEL: sentinels += 1 elif signal == ProcessSignal.SOLVE_CASE: logs = result.pop('log') for log in logs: self.accumulated_logs.append(logs) self.db_client.update_ai_report( self.manager_report, result['ai_name'], case_id, case_status=result['case_status'], error=result['error'], soft_timeout=result['soft_timeout'], hard_timeout=result['hard_timeout'] ) results[case_id][result['ai_name']] = result message = ( f'Finished running benchmark with id {self.benchmark_id} ' f'and case set id {self.case_set_id}' ) self.accumulated_logs.append(message) logger.info(message) self.finish_execution() return {'benchmark_id': self.benchmark_id, 'results': results}
def _run_benchmark(self): self.__state = ManagerStatuses.RUNNING results = {} burnt_cases_path = os.path.join(DATA_DIR, "burnt_cases") create_dirs(burnt_cases_path) message = f"Starting run of benchmark with id {self.benchmark_id}" self.accumulated_logs.append(message) logger.info(message) for case_num, case in enumerate(self.case_set): case_index = case_num + 1 case_id = case["caseData"]["caseId"] results[case_id] = {} self.manager_report = self.db_client.update_manager_report( case_index, case_id, self.manager_report) runners_pipes = [pipe for (_, pipe) in self.runners_pool] random.shuffle(runners_pipes) message = f"Starting health checks for case #{case_index}..." self.accumulated_logs.append(message) logger.info(message) for pipe in runners_pipes: pipe.send((ProcessSignal.HEALTH_CHECK, {"case_id": case_id})) sentinels = 0 healthchecked_ai_ids = [] while sentinels < len(self.runners_pool): signal, runner_id, result = self.result_queue.get() if signal == ProcessSignal.SENTINEL: sentinels += 1 elif signal == ProcessSignal.HEALTH_CHECK: self.db_client.create_ai_report( self.manager_report, result["ai_name"], case_id, result["report"], ) if result["healthy"]: healthchecked_ai_ids.append(runner_id) else: # in this case we need to add the failed ai to the results as # not healthchecked successfully output = { "ai_name": result["ai_name"], "result": result["result"], "error": result["error"], "case_status": result["case_status"], "soft_timeout": result["soft_timeout"], "hard_timeout": result["hard_timeout"], "healthchecked": result["healthchecked"], } results[case_id][result["ai_name"]] = output if result["log"]: for log in result["log"]: self.accumulated_logs.append(log) if healthchecked_ai_ids: message = ( "The following AIs have passed the health check for case " + f"#{case_index}: " + ", ".join([ self.runners_pool[id_][0].ai_name for id_ in healthchecked_ai_ids ])) self.accumulated_logs.append(message) logger.info(message) # 'marks' case as 'burnt' case_burnt_path = os.path.join(burnt_cases_path, case_id + "_" + str(case_num)) open(case_burnt_path, "w").close() else: # if all healthchecks have failed, what to do? # TODO: implement rule for when all healthchecks have failed message = (f"All AIs have failed the health check for " f"case #{case_index}") self.accumulated_logs.append(message) logger.error(message) return {"runId": self.benchmark_id, "results": results} random.shuffle(healthchecked_ai_ids) for id_ in healthchecked_ai_ids: pipe = self.runners_pool[id_][1] pipe.send((ProcessSignal.SOLVE_CASE, {"case": case})) sentinels = 0 while sentinels < len(healthchecked_ai_ids): signal, runner_id, result = self.result_queue.get() if signal == ProcessSignal.SENTINEL: sentinels += 1 elif signal == ProcessSignal.SOLVE_CASE: logs = result.pop("log") for log in logs: self.accumulated_logs.append(logs) self.db_client.update_ai_report( self.manager_report, result["ai_name"], case_id, case_status=result["case_status"], error=result["error"], soft_timeout=result["soft_timeout"], hard_timeout=result["hard_timeout"], ) results[case_id][result["ai_name"]] = result message = (f"Finished running benchmark with id {self.benchmark_id} " f"and case set id {self.case_set_id}") self.accumulated_logs.append(message) logger.info(message) self.finish_execution() return {"benchmark_id": self.benchmark_id, "results": results}