def test_capture_output(self): print("Testing capturing of output.") time_limit = 2 grace_period = 1 wrapped_function = pynisher.enforce_limits(wall_time_in_s=time_limit, mem_in_mb=None, context=multiprocessing.get_context(context), grace_period_in_s=grace_period, logger=self.logger, capture_output=True)( print_and_sleep) wrapped_function(5) self.assertTrue('0' in wrapped_function.stdout) self.assertEqual(wrapped_function.stderr, '') self.assertEqual(wrapped_function.exitcode, 0) wrapped_function = pynisher.enforce_limits(wall_time_in_s=time_limit, mem_in_mb=None, context=multiprocessing.get_context(context), grace_period_in_s=grace_period, logger=self.logger, capture_output=True)( print_and_fail) wrapped_function() self.assertIn('0', wrapped_function.stdout) self.assertIn('RuntimeError', wrapped_function.stderr) self.assertEqual(wrapped_function.exitcode, 1)
def test_capture_output(self): print("Testing capturing of output.") global logger time_limit = 2 grace_period = 1 def print_and_sleep(t): for i in range(t): print(i) time.sleep(1) wrapped_function = pynisher.enforce_limits(wall_time_in_s = time_limit, mem_in_mb=None, grace_period_in_s=grace_period, logger=logger, capture_output=True)(print_and_sleep) wrapped_function(5) self.assertTrue('0' in wrapped_function.stdout) self.assertTrue(wrapped_function.stderr == '') def print_and_fail(): print(0) raise RuntimeError() wrapped_function = pynisher.enforce_limits(wall_time_in_s = time_limit, mem_in_mb=None, grace_period_in_s=grace_period, logger=logger, capture_output=True)(print_and_fail) wrapped_function() self.assertTrue('0' in wrapped_function.stdout) self.assertTrue('RuntimeError' in wrapped_function.stderr)
def test_liblinear_svc(self): global logger time_limit = 2 grace_period = 1 logger_mock = unittest.mock.Mock() wrapped_function = pynisher.enforce_limits( cpu_time_in_s=time_limit, mem_in_mb=None, grace_period_in_s=grace_period, logger=logger) wrapped_function.logger = logger_mock wrapped_function = wrapped_function(svc_example) start = time.time() wrapped_function(16384, 10000) duration = time.time() - start time.sleep(1) p = psutil.Process() self.assertEqual(len(p.children(recursive=True)), 0) self.assertEqual(logger_mock.debug.call_count, 2) self.assertEqual(logger_mock.debug.call_args_list[0][0][0], 'Function called with argument: (16384, 10000), {}') self.assertEqual( logger_mock.debug.call_args_list[1][0][0], 'Your function call closed the pipe prematurely -> ' 'Subprocess probably got an uncatchable signal.') # self.assertEqual(wrapped_function.exit_status, pynisher.CpuTimeoutException) self.assertGreater(duration, time_limit - 0.1) self.assertLess(duration, time_limit + grace_period + 0.1) self.assertEqual(wrapped_function.exitcode, -9)
def run(self): buffer_time = 5 # TODO: Buffer time should also be used in main!? while True: time_left = self.time_limit - buffer_time safe_ensemble_script = pynisher.enforce_limits( wall_time_in_s=int(time_left), mem_in_mb=self.memory_limit, logger=self.logger)(self.main) safe_ensemble_script() if safe_ensemble_script.exit_status is pynisher.MemorylimitException: # if ensemble script died because of memory error, # reduce nbest to reduce memory consumption and try it again if self.ensemble_nbest == 1: self.logger.critical( "Memory Exception -- Unable to escape from memory exception" ) else: self.ensemble_nbest = int(self.ensemble_nbest / 2) self.logger.warning( "Memory Exception -- restart with less ensemle_nbest: %d" % (self.ensemble_nbest)) # ATTENTION: main will start from scratch; # all data structures are empty again continue break
def run(self): # we use pynisher here to enforce limits safe_smbo = pynisher.enforce_limits(mem_in_mb=self.memory_limit, wall_time_in_s=int( self.total_walltime_limit), grace_period_in_s=5)(self.run_smbo) safe_smbo(max_iters=self.smac_iters)
def test_crash_unexpectedly(self): print("Testing an unexpected signal simulating a crash.") wrapped_function = pynisher.enforce_limits()(crash_unexpectedly) self.assertIsNone(wrapped_function(signal.SIGQUIT)) self.assertEqual(wrapped_function.exit_status, pynisher.SignalException) self.assertEqual(wrapped_function.exitcode, 0)
def test_time_out(self): print("Testing wall clock time constraint.") local_mem_in_mb = None local_wall_time_in_s = 1 local_cpu_time_in_s = None local_grace_period = None wrapped_function = pynisher.enforce_limits( mem_in_mb=local_mem_in_mb, wall_time_in_s=local_wall_time_in_s, context=multiprocessing.get_context(context), logger=self.logger, cpu_time_in_s=local_cpu_time_in_s, grace_period_in_s=local_grace_period)(simulate_work) for mem in range(1, 10): self.assertIsNone(wrapped_function(mem, 10, 0)) self.assertEqual(wrapped_function.exit_status, pynisher.TimeoutException, str(wrapped_function.result)) if sys.version_info < (3, 7): # Apparently, the exit code here is not deterministic (so far only PYthon 3.6) # In the case of python 3.6 forkserver/spwan we get a 255/-15 self.assertIn(wrapped_function.exitcode, (-15, 255)) else: self.assertIn(wrapped_function.exitcode, (-15, 0))
def test_big_return_data(self): print("Testing big return values") wrapped_function = pynisher.enforce_limits()(return_big_array) for num_elements in [4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144]: bla = wrapped_function(num_elements) self.assertEqual(len(bla), num_elements)
def test_too_little_memory(self): # Test what happens if the target process does not have a sufficiently high memory limit # 2048 MB dummy_content = [42.] * ((1024 * 2048) // 8) # noqa wrapped_function = pynisher.enforce_limits( mem_in_mb=1, context=multiprocessing.get_context(context), logger=self.logger, )(simulate_work) wrapped_function(size_in_mb=1000, wall_time_in_s=10, num_processes=1, dummy_content=dummy_content) self.assertIsNone(wrapped_function.result) # The following is a bit weird, on my local machine I get a SubprocessException, but on # travis-ci I get a MemoryLimitException self.assertIn( wrapped_function.exit_status, (pynisher.SubprocessException, pynisher.MemorylimitException)) # This is triggered on my local machine, but not on travis-ci if wrapped_function.exit_status == pynisher.SubprocessException: self.assertEqual(wrapped_function.os_errno, 12) self.assertEqual(wrapped_function.exitcode, 0)
def nested_pynisher(level=2, cputime=5, walltime=5, memlimit = 10e24, increment = -1, grace_period = 1): print("this is level {}".format(level)) if level == 0: spawn_rogue_subprocess(10) else: func = pynisher.enforce_limits(mem_in_mb=memlimit, cpu_time_in_s=cputime, wall_time_in_s=walltime, grace_period_in_s = grace_period)(nested_pynisher) func(level-1, None, walltime+increment, memlimit, increment)
def run(self): buffer_time = 5 # TODO: Buffer time should also be used in main!? while True: time_left = self.time_limit - buffer_time safe_ensemble_script = pynisher.enforce_limits( wall_time_in_s=int(time_left), mem_in_mb=self.memory_limit, logger=self.logger)(self.main) safe_ensemble_script() if safe_ensemble_script.exit_status is pynisher.MemorylimitException: # if ensemble script died because of memory error, # reduce nbest to reduce memory consumption and try it again if isinstance(self.ensemble_nbest, numbers.Integral) and \ self.ensemble_nbest == 1: self.logger.critical( "Memory Exception -- Unable to further reduce the number of ensemble " "members -- please restart Auto-sklearn with a higher value for the " "argument 'ensemble_memory_limit' (current limit is {} MB)." "".format(self.memory_limit)) else: if isinstance(self.ensemble_nbest, numbers.Integral): self.ensemble_nbest = int(self.ensemble_nbest / 2) else: self.ensemble_nbest = self.ensemble_nbest / 2 self.logger.warning("Memory Exception -- restart with " "less ensemble_nbest: %d" % self.ensemble_nbest) # ATTENTION: main will start from scratch; # all data structures are empty again continue break
def test_kill_subprocesses(self): wrapped_function = pynisher.enforce_limits(wall_time_in_s = 1)(spawn_rogue_subprocess) wrapped_function(5) time.sleep(1) p = psutil.Process() self.assertEqual(len(p.children(recursive=True)), 0)
def test_big_return_data(self): print("Testing big return values") wrapped_function = pynisher.enforce_limits()(return_big_array) for num_elements in [4,16,64, 256, 1024, 4096, 16384, 65536, 262144]: bla = wrapped_function(num_elements) self.assertEqual(len(bla), num_elements)
def test_capture_output_error(self): grace_period = 1 # We want to mimic an scenario where the context.Pipe # fails early, so that a stdout file was not created. context = unittest.mock.Mock() logger_mock = unittest.mock.Mock() context.Pipe.return_value = (unittest.mock.Mock(), unittest.mock.Mock()) context.Pipe.return_value[0]._side_effect = ValueError() wrapped_function = pynisher.enforce_limits( wall_time_in_s=1, mem_in_mb=None, context=context, grace_period_in_s=grace_period, logger=logger_mock, capture_output=True )(print_and_sleep) return_value = wrapped_function(5) # On failure, the log file will catch the error msg self.assertIn('Cannot recover the output from', str(logger_mock.error.call_args)) # And the stdout/stderr attributes will be left as None self.assertIsNone(wrapped_function.stdout) self.assertIsNone(wrapped_function.stderr) # Also check the return value self.assertEqual(wrapped_function.exit_status, 5) self.assertIsNone(return_value)
def test_high_cpu_percentage(self): print("Testing cpu time constraint.") cpu_time_in_s = 2 grace_period = 1 wrapped_function = pynisher.enforce_limits(cpu_time_in_s = cpu_time_in_s, grace_period_in_s = grace_period)(cpu_usage) self.assertEqual(None,wrapped_function()) self.assertEqual(wrapped_function.exit_status, pynisher.CpuTimeoutException)
def test_pynisher_memory_error(self): def fill_memory(): a = np.random.random_sample((10000, 10000)).astype(np.float64) return np.sum(a) safe_eval = pynisher.enforce_limits(mem_in_mb=1)(fill_memory) safe_eval() self.assertEqual(safe_eval.exit_status, pynisher.MemorylimitException)
def test_pynisher_timeout(self): def run_over_time(): time.sleep(2) safe_eval = pynisher.enforce_limits(wall_time_in_s=1, grace_period_in_s=0)(run_over_time) safe_eval() self.assertEqual(safe_eval.exit_status, pynisher.TimeoutException)
def _evaluate(self, config, type="normal"): self.check_time() self.id += 1 start_time = time.time() if type == "normal": eval_func = pynisher.enforce_limits( mem_in_mb=self.mem_in_mb, cpu_time_in_s=self.cpu_time_in_s)(self.eval_func) elif type == "init": eval_func = pynisher.enforce_limits( mem_in_mb=5000, cpu_time_in_s=self.cpu_time_in_s)(self.eval_func) else: eval_func = self.eval_func try: res = eval_func(config, self.bestconfig, self.id) self.sucess_run += 1 except Timeout.Timeout as e: print(e) res = None raise (e) if res is None: res = {"validation_score": 0, "info": None} res["running_time"] = time.time() - start_time self._update_expert(config, res["validation_score"]) if type != "default": res["predict_performance"] = self.score_model.get_performance( np.nan_to_num(config.get_array())) if res["validation_score"] > 0: self.score_model.partial_fit(np.nan_to_num(config.get_array()), res["validation_score"], res["running_time"]) else: self.score_model.partial_fit(np.nan_to_num(config.get_array()), 0, 3000) self.log_result(res, config) return res["validation_score"]
def run(self, config, instance=None, cutoff=None, memory_limit=None, seed=12345, instance_specific="0"): D = self.backend.load_datamanager() queue = multiprocessing.Queue() arguments = dict(logger=logging.getLogger("pynisher"), wall_time_in_s=cutoff, mem_in_mb=memory_limit) obj_kwargs = dict(queue=queue, config=config, data=D, backend=self.backend, seed=self.autosklearn_seed, num_run=self.num_run, with_predictions=self.with_predictions, all_scoring_functions=self.all_scoring_functions, output_y_test=self.output_y_test, subsample=None, **self.resampling_strategy_args) obj = pynisher.enforce_limits(**arguments)(self.ta) obj(**obj_kwargs) if obj.exit_status is pynisher.TimeoutException: status = StatusType.TIMEOUT cost = WORST_POSSIBLE_RESULT additional_run_info = 'Timeout' elif obj.exit_status is pynisher.MemorylimitException: status = StatusType.MEMOUT cost = WORST_POSSIBLE_RESULT additional_run_info = 'Memout' else: try: info = queue.get(block=True, timeout=2) result = info[1] error_message = info[3] if obj.exit_status == 0 and result is not None: status = StatusType.SUCCESS cost = result additional_run_info = '' else: status = StatusType.CRASHED cost = WORST_POSSIBLE_RESULT additional_run_info = error_message except Exception as e0: additional_run_info = 'Unknown error (%s) %s' % (type(e0), e0) status = StatusType.CRASHED cost = WORST_POSSIBLE_RESULT runtime = float(obj.wall_clock_time) self.num_run += 1 return status, cost, runtime, additional_run_info
def compute(self, config, budget, working_directory, config_id, **kwargs): self.autonet_logger.debug("Budget " + str(budget) + " config: " + str(config)) start_time = time.time() self.autonet_logger.debug("Starting optimization!") # guarantee time and memory limits using pynisher if self.guarantee_limits: import pynisher time_limit = None if self.budget_type == BudgetTypeTime: grace_time = 10 time_limit = int(budget + 240) # start optimization limit_train = pynisher.enforce_limits( mem_in_mb=self.pipeline_config['memory_limit_mb'], wall_time_in_s=time_limit)(self.optimize_pipeline) result = limit_train(config, config_id, budget, start_time) # check for exceptions if (limit_train.exit_status == pynisher.TimeoutException): raise Exception("Time limit reached. Took " + str((time.time() - start_time)) + " seconds with budget " + str(budget)) elif (limit_train.exit_status == pynisher.MemorylimitException): result = {"loss": 100000, "info": {}} return result elif (limit_train.exit_status != 0): self.autonet_logger.info('Exception occurred using config:\n' + str(config)) raise Exception("Exception in train pipeline. Took " + str((time.time() - start_time)) + " seconds with budget " + str(budget)) else: result = self.optimize_pipeline(config, config_id, budget, start_time) loss = result['loss'] info = result['info'] self.autonet_logger.debug("Result: " + str(loss) + " info: " + str(info)) # that is not really elegant but we can want to achieve some kind of feedback network_name = [v for k, v in config.items() if k.endswith('network') ] or "None" self.autonet_logger.info("Training " + str(network_name) + " with budget " + str(budget) + " resulted in optimize-metric-loss: " + str(loss) + " took " + str((time.time() - start_time)) + " seconds") return result
def test_crash_unexpectedly(self): print("Testing an unexpected signal simulating a crash.") wrapped_function = pynisher.enforce_limits( context=multiprocessing.get_context(context), logger=self.logger, )(crash_unexpectedly) self.assertIsNone(wrapped_function(signal.SIGQUIT)) self.assertEqual(wrapped_function.exit_status, pynisher.SignalException) self.assertEqual(wrapped_function.exitcode, 0)
def evaluate(self, config): eval_func = pynisher.enforce_limits(mem_in_mb=self.mem_in_mb, cpu_time_in_s=self.cpu_time_in_s)( self.eval_func) res = eval_func(config) print(config, "score: ", res, end="\n\n") return res
def test_big_return_data(self): print("Testing big return values") wrapped_function = pynisher.enforce_limits( context=multiprocessing.get_context(context), logger=self.logger, )(return_big_array) for num_elements in [4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144]: bla = wrapped_function(num_elements) self.assertEqual(len(bla), num_elements) self.assertEqual(wrapped_function.exitcode, 0)
def test_num_processes(self): print("Testing number of processes constraint.") local_mem_in_mb = None local_num_processes = 1 local_wall_time_in_s = None local_grace_period = None wrapped_function = pynisher.enforce_limits(mem_in_mb = local_mem_in_mb, wall_time_in_s=local_wall_time_in_s,num_processes = local_num_processes, grace_period_in_s = local_grace_period)(simulate_work) for processes in [2,15,50,100,250]: self.assertIsNone(wrapped_function(0,0, processes)) self.assertEqual(wrapped_function.exit_status, pynisher.SubprocessException)
def test_out_of_memory(self): print("Testing memory constraint.") local_mem_in_mb = 32 local_wall_time_in_s = None local_cpu_time_in_s = None local_grace_period = None wrapped_function = pynisher.enforce_limits(mem_in_mb = local_mem_in_mb, wall_time_in_s=local_wall_time_in_s, cpu_time_in_s = local_cpu_time_in_s, grace_period_in_s = local_grace_period)(simulate_work) for mem in [1024, 2048, 4096]: self.assertIsNone(wrapped_function(mem,0,0)) self.assertEqual(wrapped_function.exit_status, pynisher.MemorylimitException)
def get_test_performance(self, X, y, categorical_features, X_test=None, y_test=None): test_func = pynisher.enforce_limits( mem_in_mb=self.memory_limit, cpu_time_in_s=self.time_limit_for_evaluation * 3)(test_function) print("Get test performance ...") return self.searcher.test_performance(X, y, X_test, y_test, test_func, categorical_features)
def test_time_out(self): print("Testing wall clock time constraint.") local_mem_in_mb = None local_wall_time_in_s = 1 local_cpu_time_in_s = None local_grace_period = None wrapped_function = pynisher.enforce_limits(mem_in_mb = local_mem_in_mb, wall_time_in_s=local_wall_time_in_s, cpu_time_in_s = local_cpu_time_in_s, grace_period_in_s = local_grace_period)(simulate_work) for mem in range(1,10): self.assertIsNone(wrapped_function(mem,10,0)) self.assertEqual(wrapped_function.exit_status, pynisher.TimeoutException)
def test_liblinear_svc(self): time_limit = 2 grace_period = 1 this_logger = PickableMock() wrapped_function = pynisher.enforce_limits( cpu_time_in_s=time_limit, mem_in_mb=None, context=multiprocessing.get_context(context), grace_period_in_s=grace_period, logger=this_logger) wrapped_function = wrapped_function(svc_example) start = time.time() wrapped_function(16384, 10000) duration = time.time() - start time.sleep(1) p = psutil.Process() self.assertEqual(len(p.children(recursive=True)), expected_children[context]) # Using a picklable-logger to capture all messages self.assertEqual(this_logger.debug.call_count, 4) self.assertEqual(this_logger.debug.call_args_list[0][0][0], 'Restricting your function to 2 seconds cpu time.') self.assertEqual(this_logger.debug.call_args_list[1][0][0], 'Allowing a grace period of 1 seconds.') self.assertEqual(this_logger.debug.call_args_list[2][0][0], 'Function called with argument: (16384, 10000), {}') self.assertEqual( this_logger.debug.call_args_list[3][0][0], 'Your function call closed the pipe prematurely -> ' 'Subprocess probably got an uncatchable signal.') # self.assertEqual(wrapped_function.exit_status, pynisher.CpuTimeoutException) # The tolerance in this context is how much overhead time we accepted in pynisher # Depending in the context, we might require up to 0.5 seconds tolerance as seen # in github actions tolerance = 0.1 if context == 'fork' else 0.5 if sys.version_info < (3, 7): # In python 3.6, in github actions we see higher times around 0.2 more than expected # This happens in all 3 context # Also 255 exit code is seen in forksever/spawn in 3.6 exclusively tolerance += 0.2 self.assertGreater(duration, time_limit - tolerance) self.assertLess(duration, time_limit + grace_period + tolerance) self.assertIn(wrapped_function.exitcode, (-9, 255)) else: self.assertGreater(duration, time_limit - tolerance) self.assertLess(duration, time_limit + grace_period + tolerance) self.assertEqual(wrapped_function.exitcode, -9)
def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchComponent: """ Fits a component by using an input dictionary with pre-requisites Args: X (X: Dict[str, Any]): Dependencies needed by current component to perform fit y (Any): not used. To comply with sklearn API Returns: A instance of self """ # Make sure that the prerequisites are there self.check_requirements(X, y) # Setup the logger self.logger = get_named_client_logger( output_dir=X['backend'].temporary_directory, name=X['job_id'], # Log to a user provided port else to the default logging port port=X['logger_port' ] if 'logger_port' in X else logging.handlers.DEFAULT_TCP_LOGGING_PORT, ) fit_function = self._fit if X['use_pynisher']: wall_time_in_s = X['runtime'] if 'runtime' in X else None memory_limit = X['cpu_memory_limit'] if 'cpu_memory_limit' in X else None fit_function = pynisher.enforce_limits( wall_time_in_s=wall_time_in_s, mem_in_mb=memory_limit, logger=self.logger )(self._fit) # Call the actual fit function. state_dict = fit_function( X=X, y=y, **kwargs ) if X['use_pynisher']: # Normally the X[network] is a pointer to the object, so at the # end, when we train using X, the pipeline network is updated for free # If we do multiprocessing (because of pynisher) we have to update # X[network] manually. we do so in a way that every pipeline component # can see this new network -- via an update, not overwrite of the pointer state_dict = state_dict.result X['network'].load_state_dict(state_dict) # TODO: when have the optimizer code, the pynisher object might have failed # We should process this function as Failure if so trough fit_function.exit_status return cast(autoPyTorchComponent, self.choice)
def _calculate_metafeatures_with_limits(self, time_limit): res = None time_limit = max(time_limit, 1) try: safe_mf = pynisher.enforce_limits(mem_in_mb=self.memory_limit, wall_time_in_s=int(time_limit), grace_period_in_s=30)( self._calculate_metafeatures) res = safe_mf() except Exception as e: self.logger.error('Error getting metafeatures: %s', str(e)) return res
def compute(self, config, budget, working_directory, config_id, **kwargs): config.update(self.constant_hyperparameter) if self.guarantee_limits and self.budget_type == 'time': import pynisher limit_train = pynisher.enforce_limits(mem_in_mb=self.pipeline_config['memory_limit_mb'], wall_time_in_s=int(budget * 4))(self.optimize_pipeline) : else: result, randomstate = self.optimize_pipeline(config, budget, config_id, random.getstate())
def test_success(self): print("Testing unbounded function call which have to run through!") local_mem_in_mb = None local_wall_time_in_s = None local_cpu_time_in_s = None local_grace_period = None wrapped_function = pynisher.enforce_limits(mem_in_mb = local_mem_in_mb, wall_time_in_s=local_wall_time_in_s, cpu_time_in_s = local_cpu_time_in_s, grace_period_in_s = local_grace_period)(simulate_work) for mem in [1,2,4,8,16]: self.assertEqual((mem,0,0),wrapped_function(mem,0,0)) self.assertEqual(wrapped_function.exit_status, 0)
def test_busy_in_C_library(self): global logger wrapped_function = pynisher.enforce_limits(wall_time_in_s = 2)(svm_example) start = time.time() wrapped_function(16384, 128) duration = time.time()-start time.sleep(1) p = psutil.Process() self.assertEqual(len(p.children(recursive=True)), 0) self.assertTrue(duration < 2.1)
def test_liblinear_svc(self): global logger time_limit = 2 grace_period = 1 wrapped_function = pynisher.enforce_limits(cpu_time_in_s = time_limit, mem_in_mb=None, grace_period_in_s=grace_period, logger=logger)(svc_example) start = time.time() wrapped_function(16384, 1000) duration = time.time()-start time.sleep(1) p = psutil.Process() self.assertEqual(len(p.children(recursive=True)), 0) # fails with pynisher.AnythingException for some reason #self.assertTrue(wrapped_function.exit_status == pynisher.CpuTimeoutException) self.assertTrue(duration > time_limit-0.1) self.assertTrue(duration < time_limit+grace_period+0.1)
def evaluate_function(function, config_dict, deterministic=False, has_instances=False): current_t_limit = int(math.ceil(config_dict.pop('cutoff_time'))) wrapped_function = pynisher.enforce_limits( wall_time_in_s=current_t_limit, grace_period_in_s = 1)(function) # delete the unused variables from the dict if not has_instances: del config_dict['instance'] if deterministic: del config_dict['seed'] start = time.time() res = wrapped_function(**config_dict) wall_time = time.time()-start cpu_time = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime # try to infere the status of the function call: # if res['status'] exsists, it will be used in 'report_result' # if there was no return value, it has either crashed or timed out # for simple function, we just use 'SAT' result_dict = { 'value' : (2**31)-1, 'status': 'CRASHED' if res is None else 'SAT', 'runtime': cpu_time } if res is not None: if isinstance(res, dict): result_dict.update(res) else: result_dict['value'] = res # account for timeeouts if not current_t_limit is None: if ( (result_dict['runtime'] > current_t_limit-2e-2) or (wall_time >= 10*current_t_limit) ): result_dict['status']='TIMEOUT' return(result_dict)
def run(self): buffer_time = 5 # TODO: Buffer time should also be used in main!? while True: time_left = self.time_limit - buffer_time safe_ensemble_script = pynisher.enforce_limits( wall_time_in_s=int(time_left), mem_in_mb=self.memory_limit, logger=self.logger )(self.main) safe_ensemble_script() if safe_ensemble_script.exit_status is pynisher.MemorylimitException: # if ensemble script died because of memory error, # reduce nbest to reduce memory consumption and try it again if self.ensemble_nbest == 1: self.logger.critical("Memory Exception -- Unable to escape from memory exception") else: self.ensemble_nbest = int(self.ensemble_nbest/2) self.logger.warning("Memory Exception -- restart with less ensemle_nbest: %d" %(self.ensemble_nbest )) # ATTENTION: main will start from scratch; # all data structures are empty again continue break
def run(self, config, instance=None, cutoff=None, seed=12345, instance_specific=None): queue = multiprocessing.Queue() if not (instance_specific is None or instance_specific == '0'): raise ValueError(instance_specific) init_params = {'instance': instance} if self.init_params is not None: init_params.update(self.init_params) arguments = dict( logger=logging.getLogger("pynisher"), wall_time_in_s=cutoff, mem_in_mb=self.memory_limit, ) obj_kwargs = dict( queue=queue, config=config, backend=self.backend, metric=self.metric, seed=self.autosklearn_seed, num_run=self.num_run, all_scoring_functions=self.all_scoring_functions, output_y_hat_optimization=self.output_y_hat_optimization, include=self.include, exclude=self.exclude, disable_file_output=self.disable_file_output, instance=instance, init_params=init_params, ) if self.resampling_strategy != 'test': obj_kwargs['resampling_strategy'] = self.resampling_strategy obj_kwargs['resampling_strategy_args'] = self.resampling_strategy_args obj = pynisher.enforce_limits(**arguments)(self.ta) obj(**obj_kwargs) if obj.exit_status in (pynisher.TimeoutException, pynisher.MemorylimitException): # Even if the pynisher thinks that a timeout or memout occured, # it can be that the target algorithm wrote something into the queue # - then we treat it as a succesful run try: info = autosklearn.evaluation.util.read_queue(queue) result = info[-1]['loss'] status = info[-1]['status'] additional_run_info = info[-1]['additional_run_info'] if obj.exit_status is pynisher.TimeoutException: additional_run_info['info'] = 'Run stopped because of timeout.' elif obj.exit_status is pynisher.MemorylimitException: additional_run_info['info'] = 'Run stopped because of memout.' if status == StatusType.SUCCESS: cost = result else: cost = WORST_POSSIBLE_RESULT except Empty: info = None if obj.exit_status is pynisher.TimeoutException: status = StatusType.TIMEOUT additional_run_info = {'error': 'Timeout'} elif obj.exit_status is pynisher.MemorylimitException: status = StatusType.MEMOUT additional_run_info = { 'error': 'Memout (used more than %d MB).' % self.memory_limit } else: raise ValueError(obj.exit_status) cost = WORST_POSSIBLE_RESULT elif obj.exit_status is TAEAbortException: info = None status = StatusType.ABORT cost = WORST_POSSIBLE_RESULT additional_run_info = {'error': 'Your configuration of ' 'auto-sklearn does not work!'} else: try: info = autosklearn.evaluation.util.read_queue(queue) result = info[-1]['loss'] status = info[-1]['status'] additional_run_info = info[-1]['additional_run_info'] if obj.exit_status == 0: cost = result else: status = StatusType.CRASHED cost = WORST_POSSIBLE_RESULT additional_run_info['info'] = 'Run treated as crashed ' \ 'because the pynisher exit ' \ 'status %s is unknown.' % \ str(obj.exit_status) except Empty: info = None additional_run_info = {'error': 'Result queue is empty'} status = StatusType.CRASHED cost = WORST_POSSIBLE_RESULT if not isinstance(additional_run_info, dict): additional_run_info = {'message': additional_run_info} if info is not None and self.resampling_strategy == \ 'holdout-iterative-fit' and status != StatusType.CRASHED: learning_curve = util.extract_learning_curve(info) learning_curve_runtime = util.extract_learning_curve( info, 'duration' ) if len(learning_curve) > 1: additional_run_info['learning_curve'] = learning_curve additional_run_info['learning_curve_runtime'] = learning_curve_runtime train_learning_curve = util.extract_learning_curve( info, 'train_loss' ) if len(train_learning_curve) > 1: additional_run_info['train_learning_curve'] = train_learning_curve additional_run_info['learning_curve_runtime'] = learning_curve_runtime if self._get_validation_loss: validation_learning_curve = util.extract_learning_curve( info, 'validation_loss', ) if len(validation_learning_curve) > 1: additional_run_info['validation_learning_curve'] = \ validation_learning_curve additional_run_info[ 'learning_curve_runtime'] = learning_curve_runtime if self._get_test_loss: test_learning_curve = util.extract_learning_curve( info, 'test_loss', ) if len(test_learning_curve) > 1: additional_run_info['test_learning_curve'] = test_learning_curve additional_run_info[ 'learning_curve_runtime'] = learning_curve_runtime if isinstance(config, int): origin = 'DUMMY' else: origin = getattr(config, 'origin', 'UNKNOWN') additional_run_info['configuration_origin'] = origin runtime = float(obj.wall_clock_time) self.num_run += 1 autosklearn.evaluation.util.empty_queue(queue) return status, cost, runtime, additional_run_info
def run(self): buffer_time = 5 time_left = self.limit - buffer_time safe_ensemble_script = pynisher.enforce_limits( wall_time_in_s=int(time_left))(self.main) safe_ensemble_script()
def run(self): # we use pynisher here to enforce limits safe_smbo = pynisher.enforce_limits(mem_in_mb=self.memory_limit, wall_time_in_s=int(self.total_walltime_limit), grace_period_in_s=5)(self.run_smbo) safe_smbo(max_iters = self.smac_iters)
def remote_smac_function(only_arg): """ The function that every worker from the multiprocessing pool calls to perform a separate SMAC run. This function is not part of the API that users should access, but rather part of the internals of pysmac. Due to the limitations of the multiprocessing module, it can only take one argument which is a list containing important arguments in a very specific order. Check the source code if you want to learn more. """ try: scenario_file, additional_options_fn, seed, function, parser_dict,\ memory_limit_smac_mb, class_path, num_instances, mem_limit_function,\ t_limit_function, deterministic, java_executable = only_arg logger = multiprocessing.get_logger() smac = remote_smac(scenario_file, additional_options_fn, seed, class_path, memory_limit_smac_mb,parser_dict, java_executable) logger.debug('Started SMAC subprocess') num_iterations = 0 while True: config_dict = smac.next_configuration() # method next_configuration checks whether smac is still alive # if it is None, it means that SMAC has finished (for whatever reason) if config_dict is None: break # delete the unused variables from the dict if num_instances is None: del config_dict['instance'] del config_dict['instance_info'] del config_dict['cutoff_length'] if deterministic: del config_dict['seed'] current_t_limit = int(ceil(config_dict.pop('cutoff_time'))) # only restrict the runtime if an initial cutoff was defined current_t_limit = None if t_limit_function is None else current_t_limit current_wall_time_limit = None if current_t_limit is None else 10*current_t_limit # execute the function and measure the time it takes to evaluate wrapped_function = pynisher.enforce_limits( mem_in_mb=mem_limit_function, cpu_time_in_s=current_t_limit, wall_time_in_s=current_wall_time_limit, grace_period_in_s = 1)(function) # workaround for the 'Resource temporarily not available' error on # the BaWue cluster if to many processes were spawned in a short # period. It now waits a second and tries again for 8 times. num_try = 1 while num_try <= 8: try: start = time.time() res = wrapped_function(**config_dict) wall_time = time.time()-start cpu_time = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime break except OSError as e: if e.errno == 11: logger.warning('Resource temporarily not available. Trail {} of 8'.format(num_try)) time.sleep(1) else: raise except: raise finally: num_try += 1 if num_try == 9: logger.warning('Configuration {} crashed 8 times, giving up on it.'.format(config_dict)) res = None if res is not None: try: logger.debug('iteration %i:function value %s, computed in %s seconds'%(num_iterations, str(res), str(res['runtime']))) except (TypeError, AttributeError, KeyError, IndexError): logger.debug('iteration %i:function value %s, computed in %s seconds'%(num_iterations, str(res),cpu_time)) except: raise else: logger.debug('iteration %i: did not return in time, so it probably timed out'%(num_iterations)) # try to infere the status of the function call: # if res['status'] exsists, it will be used in 'report_result' # if there was no return value, it has either crashed or timed out # for simple function, we just use 'SAT' status = b'CRASHED' if res is None else b'SAT' try: # check if it recorded some runtime by itself and use that if res['runtime'] > current_t_limit - 2e-2: # mini slack to account for limited precision of cputime measurement status=b'TIMEOUT' except (AttributeError, TypeError, KeyError, IndexError): # if not, we have to use our own time measurements here if (res is None) and ((cpu_time > current_t_limit - 2e-2) or (wall_time >= 10*current_t_limit)): status=b'TIMEOUT' except: # reraise in case something else went wrong raise smac.report_result(res, cpu_time, status) num_iterations += 1 except: traceback.print_exc() # to see the traceback of subprocesses
import sys # using it as a decorator for every call to this function @pynisher.enforce_limits(wall_time_in_s=2) def my_function (t): time.sleep(t) return(t) for t in range(5): print(my_function(t)) def my_other_function(t): print('foo') time.sleep(t) print('bar') return(t) # explicitly create a new function without wrapping the original everytime my_wrapped_function = pynisher.enforce_limits(wall_time_in_s=3, capture_output=True)(my_other_function) for t in range(5): print(my_wrapped_function(t)) print(vars(my_wrapped_function)) import IPython IPython.embed()
def eval_with_limits(datamanager, tmp_dir, config, seed, num_run, resampling_strategy, resampling_strategy_args, memory_limit, func_eval_time_limit, subsample=None, with_predictions=True, all_scoring_functions=False, output_y_test=True): if resampling_strategy_args is None: resampling_strategy_args = {} if resampling_strategy == 'holdout': eval_function = eval_holdout elif resampling_strategy == 'holdout-iterative-fit': eval_function = eval_iterative_holdout elif resampling_strategy == 'cv': eval_function = eval_cv elif resampling_strategy == 'partial-cv': eval_function = eval_partial_cv elif resampling_strategy == 'test': eval_function = eval_t output_y_test = False with_predictions = False else: raise ValueError('Unknown resampling strategy %s' % resampling_strategy) start_time = time.time() queue = multiprocessing.Queue() safe_eval = pynisher.enforce_limits(mem_in_mb=memory_limit, wall_time_in_s=func_eval_time_limit, cpu_time_in_s=func_eval_time_limit, grace_period_in_s=30)( eval_function) try: safe_eval(queue=queue, config=config, data=datamanager, tmp_dir=tmp_dir, seed=seed, num_run=num_run, subsample=subsample, with_predictions=with_predictions, all_scoring_functions=all_scoring_functions, output_y_test=output_y_test, **resampling_strategy_args) info = queue.get(block=True, timeout=1) except Exception as e0: if isinstance(e0, MemoryError): is_memory_error = True else: is_memory_error = False try: # This happens if a timeout is reached and a half-way trained # model can be used to predict something info = queue.get_nowait() except Exception as e1: # This happens if a timeout is reached and the model does not # support iterative_fit() duration = time.time() - start_time if is_memory_error: status = StatusType.MEMOUT elif duration >= func_eval_time_limit: status = StatusType.TIMEOUT else: status = StatusType.CRASHED info = (duration, 2.0, seed, str(e0), status) return info
def test_crash_unexpectedly(self): print("Testing an unexpected signal simulating a crash.") wrapped_function = pynisher.enforce_limits()(crash_unexpectedly) self.assertIsNone(wrapped_function(signal.SIGQUIT)) self.assertEqual(wrapped_function.exit_status, pynisher.AnythingException)