def save_replay_buffer(self, dir_path=None): if dir_path is None: dir_path = os.path.join(self.parent_level_manager.parent_graph_manager.task_parameters.experiment_path, 'replay_buffer') if not os.path.exists(dir_path): os.mkdir(dir_path) path = os.path.join(dir_path, 'RB_{}.joblib.bz2'.format(type(self).__name__)) joblib.dump(self.memory.get_all_complete_episodes(), path, compress=('bz2', 1)) screen.log('Saved replay buffer to: \"{}\" - Number of transitions: {}'.format(path, self.memory.num_transitions()))
def _sanitize_device_list(devices: List[mx.Context]) -> List[mx.Context]: """ Returns intersection of devices with available devices. If no intersection, returns mx.cpu() :param devices: list of requested devices :return: list of devices that are actually available """ actual_device = [mx.cpu() ] + [mx.gpu(i) for i in mx.test_utils.list_gpus()] intersection = [dev for dev in devices if dev in actual_device] if len(intersection) == 0: intersection = [mx.cpu()] screen.log( 'Requested devices {} not available. Default to CPU context.'. format(devices)) elif len(intersection) < len(devices): screen.log('{} not available, using {}.'.format( [dev for dev in devices if dev not in intersection], intersection)) return intersection
def init_environment_dependent_modules(self): super().init_environment_dependent_modules() self.env = self.parent_level_manager._real_environment screen.log_title("Human Control Mode") available_keys = self.env.get_available_keys() if available_keys: screen.log("Use keyboard keys to move. Press escape to quit. Available keys:") screen.log("") for action, key in self.env.get_available_keys(): screen.log("\t- {}: {}".format(action, key)) screen.separator()
def perform_reward_based_tests(args, preset_validation_params, preset_name): win_size = 10 test_name = '__test_reward' test_path = os.path.join('./experiments', test_name) if path.exists(test_path): shutil.rmtree(test_path) # run the experiment in a separate thread screen.log_title("Running test {}".format(preset_name)) log_file_name = 'test_log_{preset_name}.txt'.format( preset_name=preset_name) cmd = ('python3 rl_coach/coach.py ' '-p {preset_name} ' '-e {test_name} ' '-n {num_workers} ' '--seed 0 ' '-c ' '{level} ' '&> {log_file_name} ').format( preset_name=preset_name, test_name=test_name, num_workers=preset_validation_params.num_workers, log_file_name=log_file_name, level='-lvl ' + preset_validation_params.reward_test_level if preset_validation_params.reward_test_level else '') p = subprocess.Popen(cmd, shell=True, executable="/bin/bash", preexec_fn=os.setsid) start_time = time.time() reward_str = 'Evaluation Reward' if preset_validation_params.num_workers > 1: filename_pattern = 'worker_0*.csv' else: filename_pattern = '*.csv' test_passed = False # get the csv with the results csv_paths = read_csv_paths(test_path, filename_pattern) if csv_paths: csv_path = csv_paths[0] # verify results csv = None time.sleep(1) averaged_rewards = [0] last_num_episodes = 0 if not args.no_progress_bar: print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, args) while csv is None or ( csv['Episode #'].values[-1] < preset_validation_params.max_episodes_to_achieve_reward and time.time() - start_time < args.time_limit): try: csv = pd.read_csv(csv_path) except: # sometimes the csv is being written at the same time we are # trying to read it. no problem -> try again continue if reward_str not in csv.keys(): continue rewards = csv[reward_str].values rewards = rewards[~np.isnan(rewards)] if len(rewards) >= 1: averaged_rewards = np.convolve( rewards, np.ones(min(len(rewards), win_size)) / win_size, mode='valid') else: time.sleep(1) continue if not args.no_progress_bar: print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, args) if csv['Episode #'].shape[0] - last_num_episodes <= 0: continue last_num_episodes = csv['Episode #'].values[-1] # check if reward is enough if np.any(averaged_rewards >= preset_validation_params.min_reward_threshold): test_passed = True break time.sleep(1) # kill test and print result os.killpg(os.getpgid(p.pid), signal.SIGTERM) screen.log('') if test_passed: screen.success("Passed successfully") else: if time.time() - start_time > args.time_limit: screen.error("Failed due to exceeding time limit", crash=False) if args.verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) elif csv_paths: screen.error("Failed due to insufficient reward", crash=False) if args.verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) screen.error( "preset_validation_params.max_episodes_to_achieve_reward: {}". format( preset_validation_params.max_episodes_to_achieve_reward), crash=False) screen.error( "preset_validation_params.min_reward_threshold: {}".format( preset_validation_params.min_reward_threshold), crash=False) screen.error("averaged_rewards: {}".format(averaged_rewards), crash=False) screen.error("episode number: {}".format( csv['Episode #'].values[-1]), crash=False) else: screen.error("csv file never found", crash=False) if args.verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) shutil.rmtree(test_path) os.remove(log_file_name) return test_passed
def perform_trace_based_tests(args, preset_name, num_env_steps, level=None): test_name = '__test_trace' test_path = os.path.join('./experiments', test_name) if path.exists(test_path): shutil.rmtree(test_path) # run the experiment in a separate thread screen.log_title("Running test {}{}".format(preset_name, ' - ' + level if level else '')) log_file_name = 'test_log_{preset_name}.txt'.format( preset_name=preset_name) cmd = ('python3 rl_coach/coach.py ' '-p {preset_name} ' '-e {test_name} ' '--seed 42 ' '-c ' '--no_summary ' '-cp {custom_param} ' '{level} ' '&> {log_file_name} ').format( preset_name=preset_name, test_name=test_name, log_file_name=log_file_name, level='-lvl ' + level if level else '', custom_param='\"improve_steps=EnvironmentSteps({n});' 'steps_between_evaluation_periods=EnvironmentSteps({n});' 'evaluation_steps=EnvironmentSteps(1);' 'heatup_steps=EnvironmentSteps(1024)\"'.format(n=num_env_steps)) p = subprocess.Popen(cmd, shell=True, executable="/bin/bash", preexec_fn=os.setsid) p.wait() filename_pattern = '*.csv' # get the csv with the results csv_paths = read_csv_paths(test_path, filename_pattern) test_passed = False if not csv_paths: screen.error("csv file never found", crash=False) if args.verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) else: trace_path = os.path.join( './rl_coach', 'traces', preset_name + '_' + level.replace(':', '_') if level else preset_name, '') if not os.path.exists(trace_path): screen.log( 'No trace found, creating new trace in: {}'.format(trace_path)) os.makedirs(os.path.dirname(trace_path)) df = pd.read_csv(csv_paths[0]) df = clean_df(df) df.to_csv(os.path.join(trace_path, 'trace.csv'), index=False) screen.success("Successfully created new trace.") test_passed = True else: test_df = pd.read_csv(csv_paths[0]) test_df = clean_df(test_df) new_trace_csv_path = os.path.join(trace_path, 'trace_new.csv') test_df.to_csv(new_trace_csv_path, index=False) test_df = pd.read_csv(new_trace_csv_path) trace_csv_path = glob.glob(path.join(trace_path, 'trace.csv')) trace_csv_path = trace_csv_path[0] trace_df = pd.read_csv(trace_csv_path) test_passed = test_df.equals(trace_df) if test_passed: screen.success("Passed successfully.") os.remove(new_trace_csv_path) test_passed = True else: screen.error("Trace test failed.", crash=False) if args.overwrite: os.remove(trace_csv_path) os.rename(new_trace_csv_path, trace_csv_path) screen.error("Overwriting old trace.", crash=False) else: screen.error("bcompare {} {}".format( trace_csv_path, new_trace_csv_path), crash=False) shutil.rmtree(test_path) os.remove(log_file_name) return test_passed
def test_preset_reward(preset_name, no_progress_bar=True, time_limit=60 * 60, verbose=True): preset_validation_params = validation_params(preset_name) win_size = 10 test_name = '__test_reward_{}'.format(preset_name) test_path = os.path.join('./experiments', test_name) if path.exists(test_path): shutil.rmtree(test_path) # run the experiment in a separate thread screen.log_title("Running test {}".format(preset_name)) log_file_name = 'test_log_{preset_name}.txt'.format( preset_name=preset_name) cmd = [ 'python3', 'rl_coach/coach.py', '-p', '{preset_name}'.format(preset_name=preset_name), '-e', '{test_name}'.format(test_name=test_name), '-n', '{num_workers}'.format( num_workers=preset_validation_params.num_workers), '--seed', '0', '-c' ] if preset_validation_params.reward_test_level: cmd += [ '-lvl', '{level}'.format(level=preset_validation_params.reward_test_level) ] stdout = open(log_file_name, 'w') p = subprocess.Popen(cmd, stdout=stdout, stderr=stdout) start_time = time.time() reward_str = 'Evaluation Reward' if preset_validation_params.num_workers > 1: filename_pattern = 'worker_0*.csv' else: filename_pattern = '*.csv' test_passed = False # get the csv with the results csv_paths = read_csv_paths(test_path, filename_pattern) if csv_paths: csv_path = csv_paths[0] # verify results csv = None time.sleep(1) averaged_rewards = [0] last_num_episodes = 0 if not no_progress_bar: print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, time_limit) while csv is None or ( csv['Episode #'].values[-1] < preset_validation_params.max_episodes_to_achieve_reward and time.time() - start_time < time_limit): try: csv = pd.read_csv(csv_path) except: # sometimes the csv is being written at the same time we are # trying to read it. no problem -> try again continue if reward_str not in csv.keys(): continue rewards = csv[reward_str].values rewards = rewards[~np.isnan(rewards)] if len(rewards) >= 1: averaged_rewards = np.convolve( rewards, np.ones(min(len(rewards), win_size)) / win_size, mode='valid') else: time.sleep(1) continue if not no_progress_bar: print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, time_limit) if csv['Episode #'].shape[0] - last_num_episodes <= 0: continue last_num_episodes = csv['Episode #'].values[-1] # check if reward is enough if np.any(averaged_rewards >= preset_validation_params.min_reward_threshold): test_passed = True break time.sleep(1) # kill test and print result # os.killpg(os.getpgid(p.pid), signal.SIGKILL) p.kill() screen.log('') if test_passed: screen.success("Passed successfully") else: if time.time() - start_time > time_limit: screen.error("Failed due to exceeding time limit", crash=False) if verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) elif csv_paths: screen.error("Failed due to insufficient reward", crash=False) if verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) screen.error( "preset_validation_params.max_episodes_to_achieve_reward: {}". format( preset_validation_params.max_episodes_to_achieve_reward), crash=False) screen.error( "preset_validation_params.min_reward_threshold: {}".format( preset_validation_params.min_reward_threshold), crash=False) screen.error("averaged_rewards: {}".format(averaged_rewards), crash=False) screen.error("episode number: {}".format( csv['Episode #'].values[-1]), crash=False) else: screen.error("csv file never found", crash=False) if verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) shutil.rmtree(test_path) os.remove(log_file_name) if not test_passed: raise ValueError('golden test failed')
def wait_and_check(args, processes, force=False): if not force and len(processes) < args.max_threads: return None test_path = processes[0][0] test_name = test_path.split('/')[-1] log_file_name = processes[0][1] p = processes[0][2] p.wait() filename_pattern = '*.csv' # get the csv with the results csv_paths = read_csv_paths(test_path, filename_pattern) test_passed = False screen.log('Results for {}: '.format(test_name[13:])) if not csv_paths: screen.error("csv file never found", crash=False) if args.verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) else: trace_path = os.path.join('./rl_coach', 'traces', test_name[13:]) if not os.path.exists(trace_path): screen.log('No trace found, creating new trace in: {}'.format(trace_path)) os.makedirs(trace_path) df = pd.read_csv(csv_paths[0]) df = clean_df(df) try: df.to_csv(os.path.join(trace_path, 'trace.csv'), index=False) except: pass screen.success("Successfully created new trace.") test_passed = True else: test_df = pd.read_csv(csv_paths[0]) test_df = clean_df(test_df) new_trace_csv_path = os.path.join(trace_path, 'trace_new.csv') test_df.to_csv(new_trace_csv_path, index=False) test_df = pd.read_csv(new_trace_csv_path) trace_csv_path = glob.glob(path.join(trace_path, 'trace.csv')) trace_csv_path = trace_csv_path[0] trace_df = pd.read_csv(trace_csv_path) test_passed = test_df.equals(trace_df) if test_passed: screen.success("Passed successfully.") os.remove(new_trace_csv_path) test_passed = True else: screen.error("Trace test failed.", crash=False) if args.overwrite: os.remove(trace_csv_path) os.rename(new_trace_csv_path, trace_csv_path) screen.error("Overwriting old trace.", crash=False) else: screen.error("bcompare {} {}".format(trace_csv_path, new_trace_csv_path), crash=False) shutil.rmtree(test_path) os.remove(log_file_name) processes.pop(0) return test_passed