running_on_codalab)) p.name = 'Manager' p.start() # Monitor the process, checking to see if it is complete or if total memory usage too high while True: time.sleep(0.2) if p.is_alive(): available_mem = psutil.virtual_memory().available # measured in bytes print('Available memory = %fMB' % (available_mem / (1024 * 1024))) if available_mem < constants.OVERHEAD / 2: print('Less than %.1f GB memory available - aborting process' % (constants.OVERHEAD / float(2 ** 30))) psutil.Process(pid=p.pid).send_signal(sig=signal.SIGTERM) # tidy up then die please p.join(timeout=10) # give it a while to respond to signal if p.is_alive(): util.murder_family(p.pid, killall=True, sig=signal.SIGKILL) p.join() print('Process %d terminated' % p.pid) break if (time.time() - start_time) > (time_budget - 15): print('Time limit approaching - terminating') psutil.Process(pid=p.pid).send_signal(sig=signal.SIGTERM) p.join(timeout=10) if p.is_alive(): util.murder_family(p.pid, killall=True, sig=signal.SIGKILL) p.join() print('Process %d terminated' % p.pid) break else: print('Remaining time budget = %s' % (time_budget - (time.time() - start_time))) root_logger.info('Remaining time = %ds, dataset %s',
time.sleep(0.2) if p.is_alive(): available_mem = psutil.virtual_memory( ).available # measured in bytes print('Available memory = %fMB' % (available_mem / (1024 * 1024))) if available_mem < constants.OVERHEAD / 2: print( 'Less than %.1f GB memory available - aborting process' % (constants.OVERHEAD / float(2**30))) psutil.Process(pid=p.pid).send_signal( sig=signal.SIGTERM) # tidy up then die please p.join(timeout=10) # give it a while to respond to signal if p.is_alive(): util.murder_family(p.pid, killall=True, sig=signal.SIGKILL) p.join() print('Process %d terminated' % p.pid) break if (time.time() - start_time) > (time_budget - 15): print('Time limit approaching - terminating') psutil.Process(pid=p.pid).send_signal(sig=signal.SIGTERM) p.join(timeout=10) if p.is_alive(): util.murder_family(p.pid, killall=True, sig=signal.SIGKILL) p.join() print('Process %d terminated' % p.pid) break
def run_experiment_file(filename, plot_override=True, separate_process=False): """ This is intended to be the function that's called to initiate a series of experiments. """ exp = load_experiment_details(filename=filename) print("BEGIN EXPERIMENT SPECIFICATIONS") print(exp_params_to_str(exp)) print("END EXPERIMENT SPECIFICATIONS") # # Set number of processors p = psutil.Process() all_cpus = list(range(psutil.cpu_count() - 1)) p.cpu_affinity(all_cpus) # Set up logging root_logger = logging.getLogger() root_logger.setLevel(logging.DEBUG) form = logging.Formatter("[%(levelname)s/%(processName)s] %(asctime)s %(message)s") # Handler for logging to stderr sh = logging.StreamHandler(stream=sys.stdout) sh.setLevel(logging.WARN) # set level here # sh.addFilter(ProcessFilter()) # filter to show only logs from manager sh.setFormatter(form) root_logger.addHandler(sh) # Handler for logging to file util.move_make_file(constants.LOGFILE) fh = logging.handlers.RotatingFileHandler(constants.LOGFILE, maxBytes=512 * 1024 * 1024) fh.setLevel(logging.DEBUG) fh.setFormatter(form) root_logger.addHandler(fh) # Make output dir util.move_make(exp["output_dir"]) # Make score dir and learning curve if exp["score_dir"] is not None: util.move_make(exp["score_dir"]) with open(os.path.join(exp["score_dir"], "learning_curve.csv"), "w") as score_file: score_file.write("Time,Score\n") # Record start time open(os.path.join(exp["output_dir"], exp["basename"] + ".firstpost"), "wb").close() # Plotting? if plot_override is not None: exp["plot"] = plot_override # Start manager mgr = FixedLearnersStackingManager( exp["input_dir"], exp["output_dir"], exp["basename"], exp["time_budget"], compute_quantum=exp["compute_quantum"], plot=exp["plot"], overhead_memory=constants.OVERHEAD, cgroup_soft_limit=constants.CGROUP_SOFT_LIMIT, cgroup_hard_limit=constants.CGROUP_HARD_LIMIT, exp=exp, ) if separate_process: # Create process p = Process(target=agent.start_communication, kwargs=dict(agent=mgr)) p.name = "manager" p.start() print("\nPress enter to terminate at any time.\n") while True: if not p.is_alive(): break # Wait for one second to see if any keyboard input i, o, e = select.select([sys.stdin], [], [], 1) if i: print("\n\nTerminating") try: ps = psutil.Process(pid=p.pid) ps.send_signal(signal.SIGTERM) p.join(timeout=5) if p.is_alive(): print("Didn't respond to SIGTERM") util.murder_family(pid=p.pid, killall=True, sig=signal.SIGKILL) except psutil.NoSuchProcess: pass # already dead break else: mgr.communicate()