def run_graph_manager(self, graph_manager: 'GraphManager', args: argparse.Namespace): if args.distributed_coach and not graph_manager.agent_params.algorithm.distributed_coach_synchronization_type: screen.error( "{} algorithm is not supported using distributed Coach.". format(graph_manager.agent_params.algorithm)) if args.distributed_coach and args.checkpoint_save_secs and graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC: screen.warning( "The --checkpoint_save_secs or -s argument will be ignored as SYNC distributed coach sync type is used. Checkpoint will be saved every training iteration." ) if args.distributed_coach and not args.checkpoint_save_secs and graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.ASYNC: screen.error( "Distributed coach with ASYNC distributed coach sync type requires --checkpoint_save_secs or -s." ) # Intel optimized TF seems to run significantly faster when limiting to a single OMP thread. # This will not affect GPU runs. os.environ["OMP_NUM_THREADS"] = "1" # turn TF debug prints off if args.framework == Frameworks.tensorflow: os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_verbosity) # turn off the summary at the end of the run if necessary if not args.no_summary and not args.distributed_coach: atexit.register(logger.summarize_experiment) screen.change_terminal_title(args.experiment_name) task_parameters = TaskParameters( framework_type=args.framework, evaluate_only=args.evaluate, experiment_path=args.experiment_path, seed=args.seed, use_cpu=args.use_cpu, checkpoint_save_secs=args.checkpoint_save_secs, checkpoint_restore_dir=args.checkpoint_restore_dir, checkpoint_save_dir=args.checkpoint_save_dir, export_onnx_graph=args.export_onnx_graph, apply_stop_condition=args.apply_stop_condition) # open dashboard if args.open_dashboard: open_dashboard(args.experiment_path) if args.distributed_coach and args.distributed_coach_run_type != RunType.ORCHESTRATOR: handle_distributed_coach_tasks(graph_manager, args, task_parameters) return if args.distributed_coach and args.distributed_coach_run_type == RunType.ORCHESTRATOR: handle_distributed_coach_orchestrator(args) return # Single-threaded runs if args.num_workers == 1: self.start_single_threaded(task_parameters, graph_manager, args) else: self.start_multi_threaded(graph_manager, args)
def expand_preset(self, preset): """ Replace a short preset name with the full python path, and verify that it can be imported. """ if preset.lower() in [p.lower() for p in list_all_presets()]: preset = "{}.py:graph_manager".format( os.path.join(get_base_dir(), 'presets', preset)) else: preset = "{}".format(preset) # if a graph manager variable was not specified, try the default of :graph_manager if len(preset.split(":")) == 1: preset += ":graph_manager" # verify that the preset exists preset_path = preset.split(":")[0] if not os.path.exists(preset_path): screen.error( "The given preset ({}) cannot be found.".format(preset)) # verify that the preset can be instantiated try: short_dynamic_import(preset, ignore_module_case=True) except TypeError as e: traceback.print_exc() screen.error('Internal Error: ' + str(e) + "\n\nThe given preset ({}) cannot be instantiated.". format(preset)) return preset
def run_graph_manager(self, graph_manager: 'GraphManager', args: argparse.Namespace): if args.distributed_coach and not graph_manager.agent_params.algorithm.distributed_coach_synchronization_type: screen.error("{} algorithm is not supported using distributed Coach.".format(graph_manager.agent_params.algorithm)) # Intel optimized TF seems to run significantly faster when limiting to a single OMP thread. # This will not affect GPU runs. os.environ["OMP_NUM_THREADS"] = "1" # turn TF debug prints off if args.framework == Frameworks.tensorflow: os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_verbosity) # turn off the summary at the end of the run if necessary if not args.no_summary and not args.distributed_coach: atexit.register(logger.summarize_experiment) screen.change_terminal_title(args.experiment_name) # open dashboard if args.open_dashboard: open_dashboard(args.experiment_path) if args.distributed_coach and args.distributed_coach_run_type != RunType.ORCHESTRATOR: handle_distributed_coach_tasks(graph_manager, args) return if args.distributed_coach and args.distributed_coach_run_type == RunType.ORCHESTRATOR: handle_distributed_coach_orchestrator(graph_manager, args) return # Single-threaded runs if args.num_workers == 1: self.start_single_threaded(graph_manager, args) else: self.start_multi_threaded(graph_manager, args)
def test_preset_n_and_ew(preset_args, clres, start_time=time.time(), time_limit=Def.TimeOuts.test_time_limit): """ Test command arguments - check evaluation worker with number of workers """ ew_flag = ['-ew'] n_flag = ['-n', Def.Flags.enw] p_valid_params = p_utils.validation_params(preset_args) run_cmd = [ 'python3', 'rl_coach/coach.py', '-p', '{}'.format(preset_args), '-e', '{}'.format("ExpName_" + preset_args), ] # add flags to run command test_ew_flag = a_utils.add_one_flag_value(flag=ew_flag) test_n_flag = a_utils.add_one_flag_value(flag=n_flag) run_cmd.extend(test_ew_flag) run_cmd.extend(test_n_flag) print(str(run_cmd)) try: proc = subprocess.Popen(run_cmd, stdout=clres.stdout, stderr=clres.stdout) try: a_utils.validate_arg_result(flag=test_ew_flag, p_valid_params=p_valid_params, clres=clres, process=proc, start_time=start_time, timeout=time_limit) a_utils.validate_arg_result(flag=test_n_flag, p_valid_params=p_valid_params, clres=clres, process=proc, start_time=start_time, timeout=time_limit) except AssertionError: # close process once get assert false proc.kill() # if test failed - print logs screen.error(open(clres.stdout.name).read(), crash=False) assert False except OSError as e: # if test launch failed due to OSError - skip test pytest.skip(e) proc.kill()
def test_preset_seed(preset_args_for_seed, clres, start_time=time.time(), time_limit=Def.TimeOuts.test_time_limit): """ Test command arguments - the test will check seed argument with all presets """ def close_processes(): """ close all processes that still active in the process list """ for i in range(seed_num): proc[i].kill() proc = [] seed_num = 2 flag = ["--seed", str(seed_num)] p_valid_params = p_utils.validation_params(preset_args_for_seed) run_cmd = [ 'python3', 'rl_coach/coach.py', '-p', '{}'.format(preset_args_for_seed), '-e', '{}'.format("ExpName_" + preset_args_for_seed), ] if p_valid_params.trace_test_levels: lvl = ['-lvl', '{}'.format(p_valid_params.trace_test_levels[0])] run_cmd.extend(lvl) # add flags to run command test_flag = a_utils.add_one_flag_value(flag=flag) run_cmd.extend(test_flag) print(str(run_cmd)) for _ in range(seed_num): proc.append( subprocess.Popen(run_cmd, stdout=clres.stdout, stderr=clres.stdout)) try: a_utils.validate_arg_result(flag=test_flag, p_valid_params=p_valid_params, clres=clres, process=proc, start_time=start_time, timeout=time_limit) except AssertionError: close_processes() # if test failed - print logs screen.error(open(clres.stdout.name).read(), crash=False) assert False close_processes()
def test_preset_args(preset_args, flag, clres, start_time=time.time(), time_limit=Def.TimeOuts.test_time_limit): """ Test command arguments - the test will check all flags one-by-one.""" p_valid_params = p_utils.validation_params(preset_args) run_cmd = [ 'python3', 'rl_coach/coach.py', '-p', '{}'.format(preset_args), '-e', '{}'.format("ExpName_" + preset_args), ] if p_valid_params.reward_test_level: lvl = ['-lvl', '{}'.format(p_valid_params.reward_test_level)] run_cmd.extend(lvl) # add flags to run command test_flag = a_utils.add_one_flag_value(flag=flag) if flag[0] == "-cp": seed = ['--seed', '42'] seed_flag = a_utils.add_one_flag_value(flag=seed) run_cmd.extend(seed_flag) run_cmd.extend(test_flag) print(str(run_cmd)) try: proc = subprocess.Popen(run_cmd, stdout=clres.stdout, stderr=clres.stdout) try: a_utils.validate_arg_result(flag=test_flag, p_valid_params=p_valid_params, clres=clres, process=proc, start_time=start_time, timeout=time_limit) except AssertionError: # close process once get assert false proc.kill() # if test failed - print logs screen.error(open(clres.stdout.name).read(), crash=False) assert False except OSError as e: # if test launch failed due to OSError - skip test pytest.skip(e) proc.kill()
def training_worker(graph_manager, task_parameters, is_multi_node_test): """ restore a checkpoint then perform rollouts using the restored model :param graph_manager: An instance of the graph manager :param task_parameters: An instance of task parameters :param is_multi_node_test: If this is a multi node test insted of a normal run. """ # initialize graph graph_manager.create_graph(task_parameters) # save randomly initialized graph graph_manager.save_checkpoint() # training loop steps = 0 # evaluation offset eval_offset = 1 graph_manager.setup_memory_backend() while steps < graph_manager.improve_steps.num_steps: graph_manager.phase = core_types.RunPhase.TRAIN if is_multi_node_test and graph_manager.get_current_episodes_count( ) > graph_manager.preset_validation_params.max_episodes_to_achieve_reward: # Test failed as it has not reached the required success rate graph_manager.flush_finished() screen.error( "Could not reach required success by {} episodes.".format( graph_manager.preset_validation_params. max_episodes_to_achieve_reward), crash=True) graph_manager.fetch_from_worker( graph_manager.agent_params.algorithm.num_consecutive_playing_steps) graph_manager.phase = core_types.RunPhase.UNDEFINED if graph_manager.should_train(): steps += 1 graph_manager.phase = core_types.RunPhase.TRAIN graph_manager.train() graph_manager.phase = core_types.RunPhase.UNDEFINED if steps * graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps > graph_manager.steps_between_evaluation_periods.num_steps * eval_offset: eval_offset += 1 if graph_manager.evaluate(graph_manager.evaluation_steps): break if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC: graph_manager.save_checkpoint() else: graph_manager.occasionally_save_checkpoint()
def test_preset_runs(preset): test_failed = False print("Testing preset {}".format(preset)) # TODO: this is a temporary workaround for presets which define more than a single available level. # we should probably do this in a more robust way level = "" if "Atari" in preset: level = "breakout" elif "Mujoco" in preset: level = "inverted_pendulum" elif "ControlSuite" in preset: level = "pendulum:swingup" elif 'Lab' in preset: level = 'nav_maze_static_01' experiment_name = ".test-" + preset params = [ sys.executable, "rl_coach/coach.py", "-p", preset, "-ns", "-e", experiment_name ] if level != "": params += ["-lvl", level] p = Popen(params) # wait 10 seconds overhead of initialization etc. time.sleep(10) return_value = p.poll() if return_value is None: screen.success("{} passed successfully".format(preset)) else: test_failed = True screen.error("{} failed".format(preset), crash=False) p.kill() if os.path.exists("experiments/" + experiment_name): shutil.rmtree("experiments/" + experiment_name) assert not test_failed
def test_preset_runs(preset): test_failed = False print("Testing preset {}".format(preset)) # TODO: this is a temporary workaround for presets which define more than a single available level. # we should probably do this in a more robust way level = "" if "Atari" in preset: level = "breakout" elif "Mujoco" in preset: level = "inverted_pendulum" elif "ControlSuite" in preset: level = "pendulum:swingup" experiment_name = ".test-" + preset # overriding heatup steps to some small number of steps (1000), so to finish the heatup stage, and get to train params = [ "python3", "rl_coach/coach.py", "-p", preset, "-ns", "-e", experiment_name, '-cp', 'heatup_steps=EnvironmentSteps(1000)' ] if level != "": params += ["-lvl", level] p = Popen(params) # wait 30 seconds overhead of initialization, and finishing heatup. time.sleep(30) return_value = p.poll() if return_value is None: screen.success("{} passed successfully".format(preset)) else: test_failed = True screen.error("{} failed".format(preset), crash=False) p.kill() if os.path.exists("experiments/" + experiment_name): shutil.rmtree("experiments/" + experiment_name) assert not test_failed
def test_all_presets_are_running(): # os.chdir("../../") test_failed = False all_presets = sorted([f.split('.')[0] for f in os.listdir('rl_coach/presets') if f.endswith('.py') and f != '__init__.py']) for preset in all_presets: print("Testing preset {}".format(preset)) # TODO: this is a temporary workaround for presets which define more than a single available level. # we should probably do this in a more robust way level = "" if "Atari" in preset: level = "breakout" elif "Mujoco" in preset: level = "inverted_pendulum" elif "ControlSuite" in preset: level = "pendulum:swingup" params = ["python3", "rl_coach/coach.py", "-p", preset, "-ns", "-e", ".test"] if level != "": params += ["-lvl", level] p = Popen(params, stdout=DEVNULL) # wait 10 seconds overhead of initialization etc. time.sleep(10) return_value = p.poll() if return_value is None: screen.success("{} passed successfully".format(preset)) else: test_failed = True screen.error("{} failed".format(preset), crash=False) p.kill() if os.path.exists("experiments/.test"): shutil.rmtree("experiments/.test") assert not test_failed
def perform_reward_based_tests(args, preset_validation_params, preset_name): win_size = 10 test_name = '__test_reward' test_path = os.path.join('./experiments', test_name) if path.exists(test_path): shutil.rmtree(test_path) # run the experiment in a separate thread screen.log_title("Running test {}".format(preset_name)) log_file_name = 'test_log_{preset_name}.txt'.format( preset_name=preset_name) cmd = ('python3 rl_coach/coach.py ' '-p {preset_name} ' '-e {test_name} ' '-n {num_workers} ' '--seed 0 ' '-c ' '{level} ' '&> {log_file_name} ').format( preset_name=preset_name, test_name=test_name, num_workers=preset_validation_params.num_workers, log_file_name=log_file_name, level='-lvl ' + preset_validation_params.reward_test_level if preset_validation_params.reward_test_level else '') p = subprocess.Popen(cmd, shell=True, executable="/bin/bash", preexec_fn=os.setsid) start_time = time.time() reward_str = 'Evaluation Reward' if preset_validation_params.num_workers > 1: filename_pattern = 'worker_0*.csv' else: filename_pattern = '*.csv' test_passed = False # get the csv with the results csv_paths = read_csv_paths(test_path, filename_pattern) if csv_paths: csv_path = csv_paths[0] # verify results csv = None time.sleep(1) averaged_rewards = [0] last_num_episodes = 0 if not args.no_progress_bar: print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, args) while csv is None or ( csv['Episode #'].values[-1] < preset_validation_params.max_episodes_to_achieve_reward and time.time() - start_time < args.time_limit): try: csv = pd.read_csv(csv_path) except: # sometimes the csv is being written at the same time we are # trying to read it. no problem -> try again continue if reward_str not in csv.keys(): continue rewards = csv[reward_str].values rewards = rewards[~np.isnan(rewards)] if len(rewards) >= 1: averaged_rewards = np.convolve( rewards, np.ones(min(len(rewards), win_size)) / win_size, mode='valid') else: time.sleep(1) continue if not args.no_progress_bar: print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, args) if csv['Episode #'].shape[0] - last_num_episodes <= 0: continue last_num_episodes = csv['Episode #'].values[-1] # check if reward is enough if np.any(averaged_rewards >= preset_validation_params.min_reward_threshold): test_passed = True break time.sleep(1) # kill test and print result os.killpg(os.getpgid(p.pid), signal.SIGTERM) screen.log('') if test_passed: screen.success("Passed successfully") else: if time.time() - start_time > args.time_limit: screen.error("Failed due to exceeding time limit", crash=False) if args.verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) elif csv_paths: screen.error("Failed due to insufficient reward", crash=False) if args.verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) screen.error( "preset_validation_params.max_episodes_to_achieve_reward: {}". format( preset_validation_params.max_episodes_to_achieve_reward), crash=False) screen.error( "preset_validation_params.min_reward_threshold: {}".format( preset_validation_params.min_reward_threshold), crash=False) screen.error("averaged_rewards: {}".format(averaged_rewards), crash=False) screen.error("episode number: {}".format( csv['Episode #'].values[-1]), crash=False) else: screen.error("csv file never found", crash=False) if args.verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) shutil.rmtree(test_path) os.remove(log_file_name) return test_passed
def main(): parser = argparse.ArgumentParser() parser.add_argument('-t', '--trace', help="(flag) perform trace based testing", action='store_true') parser.add_argument( '-p', '--preset', help="(string) Name of a preset to run (as configured in presets.py)", default=None, type=str) parser.add_argument( '-ip', '--ignore_presets', help= "(string) Name of a preset(s) to ignore (comma separated, and as configured in presets.py)", default=None, type=str) parser.add_argument( '-v', '--verbose', help="(flag) display verbose logs in the event of an error", action='store_true') parser.add_argument( '--stop_after_first_failure', help="(flag) stop executing tests after the first error", action='store_true') parser.add_argument( '-tl', '--time_limit', help="time limit for each test in minutes", default= 40, # setting time limit to be so high due to DDPG being very slow - its tests are long type=int) parser.add_argument( '-np', '--no_progress_bar', help= "(flag) Don't print the progress bar (makes jenkins logs more readable)", action='store_true') parser.add_argument( '-ow', '--overwrite', help="(flag) overwrite old trace with new ones in trace testing mode", action='store_true') args = parser.parse_args() if args.preset is not None: presets_lists = [args.preset] else: # presets_lists = list_all_classes_in_module(presets) presets_lists = [ f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets')) if f[-3:] == '.py' and not f == '__init__.py' ] fail_count = 0 test_count = 0 args.time_limit = 60 * args.time_limit if args.ignore_presets is not None: presets_to_ignore = args.ignore_presets.split(',') else: presets_to_ignore = [] for idx, preset_name in enumerate(sorted(presets_lists)): if args.stop_after_first_failure and fail_count > 0: break if preset_name not in presets_to_ignore: try: preset = import_module( 'rl_coach.presets.{}'.format(preset_name)) except: screen.error("Failed to load preset <{}>".format(preset_name), crash=False) fail_count += 1 test_count += 1 continue preset_validation_params = preset.graph_manager.preset_validation_params if not args.trace and not preset_validation_params.test: continue if args.trace: num_env_steps = preset_validation_params.trace_max_env_steps if preset_validation_params.trace_test_levels: for level in preset_validation_params.trace_test_levels: test_count += 1 test_passed = perform_trace_based_tests( args, preset_name, num_env_steps, level) if not test_passed: fail_count += 1 else: test_count += 1 test_passed = perform_trace_based_tests( args, preset_name, num_env_steps) if not test_passed: fail_count += 1 else: test_passed = perform_reward_based_tests( args, preset_validation_params, preset_name) if not test_passed: fail_count += 1 screen.separator() if fail_count == 0: screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully") else: screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully")
def perform_trace_based_tests(args, preset_name, num_env_steps, level=None): test_name = '__test_trace' test_path = os.path.join('./experiments', test_name) if path.exists(test_path): shutil.rmtree(test_path) # run the experiment in a separate thread screen.log_title("Running test {}{}".format(preset_name, ' - ' + level if level else '')) log_file_name = 'test_log_{preset_name}.txt'.format( preset_name=preset_name) cmd = ('python3 rl_coach/coach.py ' '-p {preset_name} ' '-e {test_name} ' '--seed 42 ' '-c ' '--no_summary ' '-cp {custom_param} ' '{level} ' '&> {log_file_name} ').format( preset_name=preset_name, test_name=test_name, log_file_name=log_file_name, level='-lvl ' + level if level else '', custom_param='\"improve_steps=EnvironmentSteps({n});' 'steps_between_evaluation_periods=EnvironmentSteps({n});' 'evaluation_steps=EnvironmentSteps(1);' 'heatup_steps=EnvironmentSteps(1024)\"'.format(n=num_env_steps)) p = subprocess.Popen(cmd, shell=True, executable="/bin/bash", preexec_fn=os.setsid) p.wait() filename_pattern = '*.csv' # get the csv with the results csv_paths = read_csv_paths(test_path, filename_pattern) test_passed = False if not csv_paths: screen.error("csv file never found", crash=False) if args.verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) else: trace_path = os.path.join( './rl_coach', 'traces', preset_name + '_' + level.replace(':', '_') if level else preset_name, '') if not os.path.exists(trace_path): screen.log( 'No trace found, creating new trace in: {}'.format(trace_path)) os.makedirs(os.path.dirname(trace_path)) df = pd.read_csv(csv_paths[0]) df = clean_df(df) df.to_csv(os.path.join(trace_path, 'trace.csv'), index=False) screen.success("Successfully created new trace.") test_passed = True else: test_df = pd.read_csv(csv_paths[0]) test_df = clean_df(test_df) new_trace_csv_path = os.path.join(trace_path, 'trace_new.csv') test_df.to_csv(new_trace_csv_path, index=False) test_df = pd.read_csv(new_trace_csv_path) trace_csv_path = glob.glob(path.join(trace_path, 'trace.csv')) trace_csv_path = trace_csv_path[0] trace_df = pd.read_csv(trace_csv_path) test_passed = test_df.equals(trace_df) if test_passed: screen.success("Passed successfully.") os.remove(new_trace_csv_path) test_passed = True else: screen.error("Trace test failed.", crash=False) if args.overwrite: os.remove(trace_csv_path) os.rename(new_trace_csv_path, trace_csv_path) screen.error("Overwriting old trace.", crash=False) else: screen.error("bcompare {} {}".format( trace_csv_path, new_trace_csv_path), crash=False) shutil.rmtree(test_path) os.remove(log_file_name) return test_passed
def main(): parser = argparse.ArgumentParser() parser.add_argument( '-p', '--preset', '--presets', help= "(string) Name of preset(s) to run (comma separated, and as configured in presets.py)", default=None, type=str) parser.add_argument( '-ip', '--ignore_presets', help= "(string) Name of preset(s) to ignore (comma separated, and as configured in presets.py)", default=None, type=str) parser.add_argument( '-v', '--verbose', help="(flag) display verbose logs in the event of an error", action='store_true') parser.add_argument( '--stop_after_first_failure', help="(flag) stop executing tests after the first error", action='store_true') parser.add_argument( '-tl', '--time_limit', help="time limit for each test in minutes", default= 60, # setting time limit to be so high due to DDPG being very slow - its tests are long type=int) parser.add_argument( '-np', '--no_progress_bar', help= "(flag) Don't print the progress bar (makes jenkins logs more readable)", action='store_true') args = parser.parse_args() if args.preset is not None: presets_lists = args.preset.split(',') else: presets_lists = all_presets() fail_count = 0 test_count = 0 args.time_limit = 60 * args.time_limit if args.ignore_presets is not None: presets_to_ignore = args.ignore_presets.split(',') else: presets_to_ignore = [] for idx, preset_name in enumerate(sorted(presets_lists)): if args.stop_after_first_failure and fail_count > 0: break if preset_name not in presets_to_ignore: print("Attempting to run Preset: %s" % preset_name) if not importable(preset_name): screen.error("Failed to load preset <{}>".format(preset_name), crash=False) fail_count += 1 test_count += 1 continue if not has_test_parameters(preset_name): continue test_count += 1 try: test_preset_reward(preset_name, args.no_progress_bar, args.time_limit, args.verbose) except Exception as e: fail_count += 1 screen.separator() if fail_count == 0: screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully") else: screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully")
def parse_arguments(parser: argparse.ArgumentParser) -> argparse.Namespace: """ Parse the arguments that the user entered :param parser: the argparse command line parser :return: the parsed arguments """ args = parser.parse_args() # if no arg is given if len(sys.argv) == 1: parser.print_help() exit(0) # list available presets preset_names = list_all_presets() if args.list: screen.log_title("Available Presets:") for preset in sorted(preset_names): print(preset) sys.exit(0) # replace a short preset name with the full path if args.preset is not None: if args.preset.lower() in [p.lower() for p in preset_names]: args.preset = "{}.py:graph_manager".format( os.path.join(get_base_dir(), 'presets', args.preset)) else: args.preset = "{}".format(args.preset) # if a graph manager variable was not specified, try the default of :graph_manager if len(args.preset.split(":")) == 1: args.preset += ":graph_manager" # verify that the preset exists preset_path = args.preset.split(":")[0] if not os.path.exists(preset_path): screen.error("The given preset ({}) cannot be found.".format( args.preset)) # verify that the preset can be instantiated try: short_dynamic_import(args.preset, ignore_module_case=True) except TypeError as e: traceback.print_exc() screen.error('Internal Error: ' + str(e) + "\n\nThe given preset ({}) cannot be instantiated.". format(args.preset)) # validate the checkpoints args if args.checkpoint_restore_dir is not None and not os.path.exists( args.checkpoint_restore_dir): screen.error( "The requested checkpoint folder to load from does not exist.") # no preset was given. check if the user requested to play some environment on its own if args.preset is None and args.play: if args.environment_type: args.agent_type = 'Human' else: screen.error( 'When no preset is given for Coach to run, and the user requests human control over ' 'the environment, the user is expected to input the desired environment_type and level.' '\nAt least one of these parameters was not given.') elif args.preset and args.play: screen.error( "Both the --preset and the --play flags were set. These flags can not be used together. " "For human control, please use the --play flag together with the environment type flag (-et)" ) elif args.preset is None and not args.play: screen.error( "Please choose a preset using the -p flag or use the --play flag together with choosing an " "environment type (-et) in order to play the game.") # get experiment name and path args.experiment_name = logger.get_experiment_name(args.experiment_name) args.experiment_path = logger.get_experiment_path(args.experiment_name) if args.play and args.num_workers > 1: screen.warning( "Playing the game as a human is only available with a single worker. " "The number of workers will be reduced to 1") args.num_workers = 1 args.framework = Frameworks[args.framework.lower()] # checkpoints args.save_checkpoint_dir = os.path.join( args.experiment_path, 'checkpoint') if args.save_checkpoint_secs is not None else None return args
def training_worker(graph_manager, task_parameters, data_store, is_multi_node_test): """ restore a checkpoint then perform rollouts using the restored model :param graph_manager: An instance of the graph manager :param data_store: An instance of DataStore which can be used to communicate policies to roll out workers :param task_parameters: An instance of task parameters :param is_multi_node_test: If this is a multi node test insted of a normal run. """ # Load checkpoint if provided if task_parameters.checkpoint_restore_path: data_store_ckpt_load(data_store) # initialize graph graph_manager.create_graph(task_parameters) else: # initialize graph graph_manager.create_graph(task_parameters) # save randomly initialized graph data_store.save_policy(graph_manager) # training loop steps = 0 # evaluation offset eval_offset = 1 graph_manager.setup_memory_backend() graph_manager.signal_ready() while steps < graph_manager.improve_steps.num_steps: if is_multi_node_test and graph_manager.get_current_episodes_count( ) > graph_manager.preset_validation_params.max_episodes_to_achieve_reward: # Test failed as it has not reached the required success rate graph_manager.flush_finished() screen.error( "Could not reach required success by {} episodes.".format( graph_manager.preset_validation_params. max_episodes_to_achieve_reward), crash=True) graph_manager.fetch_from_worker( graph_manager.agent_params.algorithm.num_consecutive_playing_steps) if graph_manager.should_train(): steps += 1 graph_manager.train() if steps * graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps > graph_manager.steps_between_evaluation_periods.num_steps * eval_offset: eval_offset += 1 if graph_manager.evaluate(graph_manager.evaluation_steps): break if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC: data_store.save_policy(graph_manager) else: # NOTE: this implementation conflated occasionally saving checkpoints for later use # in production with checkpoints saved for communication to rollout workers. # TODO: this should be implemented with a new parameter: distributed_coach_synchronization_frequency or similar # graph_manager.occasionally_save_checkpoint() raise NotImplementedError()
import random import sys from os import path, environ from rl_coach.logger import screen from rl_coach.filters.action.partial_discrete_action_space_map import PartialDiscreteActionSpaceMap from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter try: if 'CARLA_ROOT' in environ: sys.path.append(path.join(environ.get('CARLA_ROOT'), 'PythonClient')) else: screen.error( "CARLA_ROOT was not defined. Please set it to point to the CARLA root directory and try again." ) from carla.client import CarlaClient from carla.settings import CarlaSettings from carla.tcp import TCPConnectionError from carla.sensor import Camera from carla.client import VehicleControl from carla.planner.planner import Planner from carla.driving_benchmark.experiment_suites.experiment_suite import ExperimentSuite except ImportError: from rl_coach.logger import failed_imports failed_imports.append("CARLA") import logging import subprocess from rl_coach.environments.environment import Environment, EnvironmentParameters, LevelSelection
def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--preset', '--presets', help="(string) Name of preset(s) to run (comma separated, as configured in presets.py)", default=None, type=str) parser.add_argument('-ip', '--ignore_presets', help="(string) Name of preset(s) to ignore (comma separated, and as configured in presets.py)", default=None, type=str) parser.add_argument('-v', '--verbose', help="(flag) display verbose logs in the event of an error", action='store_true') parser.add_argument('--stop_after_first_failure', help="(flag) stop executing tests after the first error", action='store_true') parser.add_argument('-ow', '--overwrite', help="(flag) overwrite old trace with new ones in trace testing mode", action='store_true') parser.add_argument('-prl', '--parallel', help="(flag) run tests in parallel", action='store_true') parser.add_argument('-ut', '--update_traces', help="(flag) update traces on repository", action='store_true') parser.add_argument('-mt', '--max_threads', help="(int) maximum number of threads to run in parallel", default=multiprocessing.cpu_count()-2, type=int) parser.add_argument( '-i', '--image', help="(string) Name of the testing image", type=str, default=None ) parser.add_argument( '-mb', '--memory_backend', help="(string) Name of the memory backend", type=str, default="redispubsub" ) parser.add_argument( '-e', '--endpoint', help="(string) Name of the s3 endpoint", type=str, default='s3.amazonaws.com' ) parser.add_argument( '-cr', '--creds_file', help="(string) Path of the s3 creds file", type=str, default='.aws_creds' ) parser.add_argument( '-b', '--bucket', help="(string) Name of the bucket for s3", type=str, default=None ) args = parser.parse_args() if args.update_traces: if not args.bucket: print("bucket_name required for s3") exit(1) if not os.environ.get('AWS_ACCESS_KEY_ID') or not os.environ.get('AWS_SECRET_ACCESS_KEY'): print("AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY env vars need to be set") exit(1) config_file = './tmp.cred' generate_config(args.image, args.memory_backend, args.endpoint, args.bucket, args.creds_file, config_file) if not args.parallel: args.max_threads = 1 if args.preset is not None: presets_lists = args.preset.split(',') else: presets_lists = [f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets')) if f[-3:] == '.py' and not f == '__init__.py'] fail_count = 0 test_count = 0 if args.ignore_presets is not None: presets_to_ignore = args.ignore_presets.split(',') else: presets_to_ignore = [] for idx, preset_name in enumerate(sorted(presets_lists)): if args.stop_after_first_failure and fail_count > 0: break if preset_name not in presets_to_ignore: try: preset = import_module('rl_coach.presets.{}'.format(preset_name)) except: screen.error("Failed to load preset <{}>".format(preset_name), crash=False) fail_count += 1 test_count += 1 continue preset_validation_params = preset.graph_manager.preset_validation_params num_env_steps = preset_validation_params.trace_max_env_steps if preset_validation_params.test_using_a_trace_test: if preset_validation_params.trace_test_levels: for level in preset_validation_params.trace_test_levels: test_count += 1 test_path, log_file, p = run_trace_based_test(preset_name, num_env_steps, level) processes.append((test_path, log_file, p)) test_passed = wait_and_check(args, processes) if test_passed is not None and not test_passed: fail_count += 1 else: test_count += 1 test_path, log_file, p = run_trace_based_test(preset_name, num_env_steps) processes.append((test_path, log_file, p)) test_passed = wait_and_check(args, processes) if test_passed is not None and not test_passed: fail_count += 1 while len(processes) > 0: test_passed = wait_and_check(args, processes, force=True) if test_passed is not None and not test_passed: fail_count += 1 screen.separator() if fail_count == 0: screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully") else: screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully", crash=False)
def wait_and_check(args, processes, force=False): if not force and len(processes) < args.max_threads: return None test_path = processes[0][0] test_name = test_path.split('/')[-1] log_file_name = processes[0][1] p = processes[0][2] p.wait() filename_pattern = '*.csv' # get the csv with the results csv_paths = read_csv_paths(test_path, filename_pattern) test_passed = False screen.log('Results for {}: '.format(test_name[13:])) if not csv_paths: screen.error("csv file never found", crash=False) if args.verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) else: trace_path = os.path.join('./rl_coach', 'traces', test_name[13:]) if not os.path.exists(trace_path): screen.log('No trace found, creating new trace in: {}'.format(trace_path)) os.makedirs(trace_path) df = pd.read_csv(csv_paths[0]) df = clean_df(df) try: df.to_csv(os.path.join(trace_path, 'trace.csv'), index=False) except: pass screen.success("Successfully created new trace.") test_passed = True else: test_df = pd.read_csv(csv_paths[0]) test_df = clean_df(test_df) new_trace_csv_path = os.path.join(trace_path, 'trace_new.csv') test_df.to_csv(new_trace_csv_path, index=False) test_df = pd.read_csv(new_trace_csv_path) trace_csv_path = glob.glob(path.join(trace_path, 'trace.csv')) trace_csv_path = trace_csv_path[0] trace_df = pd.read_csv(trace_csv_path) test_passed = test_df.equals(trace_df) if test_passed: screen.success("Passed successfully.") os.remove(new_trace_csv_path) test_passed = True else: screen.error("Trace test failed.", crash=False) if args.overwrite: os.remove(trace_csv_path) os.rename(new_trace_csv_path, trace_csv_path) screen.error("Overwriting old trace.", crash=False) else: screen.error("bcompare {} {}".format(trace_csv_path, new_trace_csv_path), crash=False) shutil.rmtree(test_path) os.remove(log_file_name) processes.pop(0) return test_passed
def __init__(self, level: LevelSelection, frame_skip: int, visualization_parameters: VisualizationParameters, additional_simulator_parameters: Dict[str, Any] = None, seed: Union[None, int]=None, human_control: bool=False, custom_reward_threshold: Union[int, float]=None, random_initialization_steps: int=1, max_over_num_frames: int=1, **kwargs): super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters) self.random_initialization_steps = random_initialization_steps self.max_over_num_frames = max_over_num_frames self.additional_simulator_parameters = additional_simulator_parameters # hide warnings gym.logger.set_level(40) """ load and initialize environment environment ids can be defined in 3 ways: 1. Native gym environments like BreakoutDeterministic-v0 for example 2. Custom gym environments written and installed as python packages. This environments should have a python module with a class inheriting gym.Env, implementing the relevant functions (_reset, _step, _render) and defining the observation and action space For example: my_environment_package:MyEnvironmentClass will run an environment defined in the MyEnvironmentClass class 3. Custom gym environments written as an independent module which is not installed. This environments should have a python module with a class inheriting gym.Env, implementing the relevant functions (_reset, _step, _render) and defining the observation and action space. For example: path_to_my_environment.sub_directory.my_module:MyEnvironmentClass will run an environment defined in the MyEnvironmentClass class which is located in the module in the relative path path_to_my_environment.sub_directory.my_module """ if ':' in self.env_id: # custom environments if '/' in self.env_id or '.' in self.env_id: # environment in a an absolute path module written as a unix path or in a relative path module # written as a python import path env_class = short_dynamic_import(self.env_id) else: # environment in a python package env_class = gym.envs.registration.load(self.env_id) # instantiate the environment if self.additional_simulator_parameters: self.env = env_class(**self.additional_simulator_parameters) else: self.env = env_class() else: self.env = gym.make(self.env_id) # for classic control we want to use the native renderer because otherwise we will get 2 renderer windows environment_to_always_use_with_native_rendering = ['classic_control', 'mujoco', 'robotics'] self.native_rendering = self.native_rendering or \ any([env in str(self.env.unwrapped.__class__) for env in environment_to_always_use_with_native_rendering]) if self.native_rendering: if hasattr(self, 'renderer'): self.renderer.close() # seed if self.seed is not None: self.env.seed(self.seed) np.random.seed(self.seed) random.seed(self.seed) # frame skip and max between consecutive frames self.is_robotics_env = 'robotics' in str(self.env.unwrapped.__class__) self.is_mujoco_env = 'mujoco' in str(self.env.unwrapped.__class__) self.is_atari_env = 'Atari' in str(self.env.unwrapped.__class__) self.timelimit_env_wrapper = self.env if self.is_atari_env: self.env.unwrapped.frameskip = 1 # this accesses the atari env that is wrapped with a timelimit wrapper env if self.env_id == "SpaceInvadersDeterministic-v4" and self.frame_skip == 4: screen.warning("Warning: The frame-skip for Space Invaders was automatically updated from 4 to 3. " "This is following the DQN paper where it was noticed that a frame-skip of 3 makes the " "laser rays disappear. To force frame-skip of 4, please use SpaceInvadersNoFrameskip-v4.") self.frame_skip = 3 self.env = MaxOverFramesAndFrameskipEnvWrapper(self.env, frameskip=self.frame_skip, max_over_num_frames=self.max_over_num_frames) else: self.env.unwrapped.frameskip = self.frame_skip self.state_space = StateSpace({}) # observations if not isinstance(self.env.observation_space, gym.spaces.dict_space.Dict): state_space = {'observation': self.env.observation_space} else: state_space = self.env.observation_space.spaces for observation_space_name, observation_space in state_space.items(): if len(observation_space.shape) == 3 and observation_space.shape[-1] == 3: # we assume gym has image observations which are RGB and where their values are within 0-255 self.state_space[observation_space_name] = ImageObservationSpace( shape=np.array(observation_space.shape), high=255, channels_axis=-1 ) else: self.state_space[observation_space_name] = VectorObservationSpace( shape=observation_space.shape[0], low=observation_space.low, high=observation_space.high ) if 'desired_goal' in state_space.keys(): self.goal_space = self.state_space['desired_goal'] # actions if type(self.env.action_space) == gym.spaces.box.Box: self.action_space = BoxActionSpace( shape=self.env.action_space.shape, low=self.env.action_space.low, high=self.env.action_space.high ) elif type(self.env.action_space) == gym.spaces.discrete.Discrete: actions_description = [] if hasattr(self.env.unwrapped, 'get_action_meanings'): actions_description = self.env.unwrapped.get_action_meanings() self.action_space = DiscreteActionSpace( num_actions=self.env.action_space.n, descriptions=actions_description ) if self.human_control: # TODO: add this to the action space # map keyboard keys to actions self.key_to_action = {} if hasattr(self.env.unwrapped, 'get_keys_to_action'): self.key_to_action = self.env.unwrapped.get_keys_to_action() else: screen.error("Error: Environment {} does not support human control.".format(self.env), crash=True) # initialize the state by getting a new state from the environment self.reset_internal_state(True) # render if self.is_rendered: image = self.get_rendered_image() scale = 1 if self.human_control: scale = 2 if not self.native_rendering: self.renderer.create_screen(image.shape[1]*scale, image.shape[0]*scale) # measurements if self.env.spec is not None: self.timestep_limit = self.env.spec.timestep_limit else: self.timestep_limit = None # the info is only updated after the first step self.state = self.step(self.action_space.default_action).next_state self.state_space['measurements'] = VectorObservationSpace(shape=len(self.info.keys())) if self.env.spec and custom_reward_threshold is None: self.reward_success_threshold = self.env.spec.reward_threshold self.reward_space = RewardSpace(1, reward_success_threshold=self.reward_success_threshold)
def test_preset_n_and_ew_and_onnx(preset_args, clres, start_time=time.time(), time_limit=Def.TimeOuts.test_time_limit): """ Test command arguments - check evaluation worker, number of workers and onnx. """ ew_flag = ['-ew'] n_flag = ['-n', Def.Flags.enw] onnx_flag = ['-onnx'] s_flag = ['-s', Def.Flags.css] p_valid_params = p_utils.validation_params(preset_args) run_cmd = [ 'python3', 'rl_coach/coach.py', '-p', '{}'.format(preset_args), '-e', '{}'.format("ExpName_" + preset_args), ] # add flags to run command test_ew_flag = a_utils.add_one_flag_value(flag=ew_flag) test_n_flag = a_utils.add_one_flag_value(flag=n_flag) test_onnx_flag = a_utils.add_one_flag_value(flag=onnx_flag) test_s_flag = a_utils.add_one_flag_value(flag=s_flag) run_cmd.extend(test_ew_flag) run_cmd.extend(test_n_flag) run_cmd.extend(test_onnx_flag) run_cmd.extend(test_s_flag) print(str(run_cmd)) proc = subprocess.Popen(run_cmd, stdout=clres.stdout, stderr=clres.stdout) try: # Check csv files has been created a_utils.validate_arg_result(flag=test_ew_flag, p_valid_params=p_valid_params, clres=clres, process=proc, start_time=start_time, timeout=time_limit) # Check csv files created same as the number of the workers a_utils.validate_arg_result(flag=test_n_flag, p_valid_params=p_valid_params, clres=clres, process=proc, start_time=start_time, timeout=time_limit) # Check checkpoint files a_utils.validate_arg_result(flag=test_s_flag, p_valid_params=p_valid_params, clres=clres, process=proc, start_time=start_time, timeout=time_limit) # TODO: add onnx check; issue found #257 except AssertionError: # close process once get assert false proc.kill() # if test failed - print logs screen.error(open(clres.stdout.name).read(), crash=False) assert False proc.kill()
# python rl_coach/utilities/carla_dataset_to_replay_buffer.py agent_params.memory.load_memory_from_file_path = "./datasets/carla_train_set_replay_buffer.p" agent_params.memory.state_key_with_the_class_index = 'high_level_command' agent_params.memory.num_classes = 4 # download dataset if it doesn't exist if not os.path.exists(agent_params.memory.load_memory_from_file_path): screen.log_title( "The CARLA dataset is not present in the following path: {}".format( agent_params.memory.load_memory_from_file_path)) result = screen.ask_yes_no("Do you want to download it now?") if result: create_dataset(None, "./datasets/carla_train_set_replay_buffer.p") else: screen.error( "Please update the path to the CARLA dataset in the CARLA_CIL preset", crash=True) ############### # Environment # ############### env_params = CarlaEnvironmentParameters() env_params.cameras = ['CameraRGB'] env_params.camera_height = 600 env_params.camera_width = 800 env_params.separate_actions_for_throttle_and_brake = True env_params.allow_braking = True env_params.quality = CarlaEnvironmentParameters.Quality.EPIC env_params.experiment_suite = CoRL2017('Town01') graph_manager = BasicRLGraphManager(agent_params=agent_params,
def test_restore_checkpoint(preset_args, clres, framework, timeout=Def.TimeOuts.test_time_limit): """ Create checkpoints and restore them in second run. :param preset_args: all preset that can be tested for argument tests :param clres: logs and csv files :param framework: name of the test framework :param timeout: max time for test """ def _create_cmd_and_run(flag): """ Create default command with given flag and run it :param flag: name of the tested flag, this flag will be extended to the running command line :return: active process """ run_cmd = [ 'python3', 'rl_coach/coach.py', '-p', '{}'.format(preset_args), '-e', '{}'.format("ExpName_" + preset_args), '--seed', '{}'.format(4), '-f', '{}'.format(framework), ] test_flag = a_utils.add_one_flag_value(flag=flag) run_cmd.extend(test_flag) print(str(run_cmd)) p = subprocess.Popen(run_cmd, stdout=clres.stdout, stderr=clres.stdout) return p start_time = time.time() if framework == "mxnet": # update preset name - for mxnet framework we are using *_DQN preset_args = Def.Presets.mxnet_args_test[0] # update logs paths test_name = 'ExpName_{}'.format(preset_args) test_path = os.path.join(Def.Path.experiments, test_name) clres.experiment_path = test_path clres.stdout_path = 'test_log_{}.txt'.format(preset_args) p_valid_params = p_utils.validation_params(preset_args) create_cp_proc = _create_cmd_and_run(flag=['--checkpoint_save_secs', '5']) # wait for checkpoint files csv_list = a_utils.get_csv_path(clres=clres) assert len(csv_list) > 0 exp_dir = os.path.dirname(csv_list[0]) checkpoint_dir = os.path.join(exp_dir, Def.Path.checkpoint) checkpoint_test_dir = os.path.join(Def.Path.experiments, Def.Path.test_dir) if os.path.exists(checkpoint_test_dir): shutil.rmtree(checkpoint_test_dir) res = a_utils.is_reward_reached(csv_path=csv_list[0], p_valid_params=p_valid_params, start_time=start_time, time_limit=timeout) if not res: screen.error(open(clres.stdout.name).read(), crash=False) assert False entities = a_utils.get_files_from_dir(checkpoint_dir) assert len(entities) > 0 assert any(".ckpt." in file for file in entities) # send CTRL+C to close experiment create_cp_proc.send_signal(signal.SIGINT) if os.path.isdir(checkpoint_dir): shutil.copytree(exp_dir, checkpoint_test_dir) shutil.rmtree(exp_dir) create_cp_proc.kill() checkpoint_test_dir = "{}/{}".format(checkpoint_test_dir, Def.Path.checkpoint) # run second time with checkpoint folder (restore) restore_cp_proc = _create_cmd_and_run( flag=['-crd', checkpoint_test_dir, '--evaluate']) new_csv_list = test_utils.get_csv_path(clres=clres) time.sleep(10) csv = pd.read_csv(new_csv_list[0]) res = csv['Episode Length'].values[-1] expected_reward = 100 assert res >= expected_reward, Def.Consts.ASSERT_MSG.format( str(expected_reward), str(res)) restore_cp_proc.kill() test_folder = os.path.join(Def.Path.experiments, Def.Path.test_dir) if os.path.exists(test_folder): shutil.rmtree(test_folder)
def __init__(self, level: LevelSelection, frame_skip: int, visualization_parameters: VisualizationParameters, target_success_rate: float = 1.0, additional_simulator_parameters: Dict[str, Any] = {}, seed: Union[None, int] = None, human_control: bool = False, custom_reward_threshold: Union[int, float] = None, random_initialization_steps: int = 1, max_over_num_frames: int = 1, observation_space_type: ObservationSpaceType = None, **kwargs): """ :param level: (str) A string representing the gym level to run. This can also be a LevelSelection object. For example, BreakoutDeterministic-v0 :param frame_skip: (int) The number of frames to skip between any two actions given by the agent. The action will be repeated for all the skipped frames. :param visualization_parameters: (VisualizationParameters) The parameters used for visualizing the environment, such as the render flag, storing videos etc. :param additional_simulator_parameters: (Dict[str, Any]) Any additional parameters that the user can pass to the Gym environment. These parameters should be accepted by the __init__ function of the implemented Gym environment. :param seed: (int) A seed to use for the random number generator when running the environment. :param human_control: (bool) A flag that allows controlling the environment using the keyboard keys. :param custom_reward_threshold: (float) Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment. If not set, this value will be taken from the Gym environment definition. :param random_initialization_steps: (int) The number of random steps that will be taken in the environment after each reset. This is a feature presented in the DQN paper, which improves the variability of the episodes the agent sees. :param max_over_num_frames: (int) This value will be used for merging multiple frames into a single frame by taking the maximum value for each of the pixels in the frame. This is particularly used in Atari games, where the frames flicker, and objects can be seen in one frame but disappear in the next. :param observation_space_type: This value will be used for generating observation space. Allows a custom space. Should be one of ObservationSpaceType. If not specified, observation space is inferred from the number of dimensions of the observation: 1D: Vector space, 3D: Image space if 1 or 3 channels, PlanarMaps space otherwise. """ super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters, target_success_rate) self.random_initialization_steps = random_initialization_steps self.max_over_num_frames = max_over_num_frames self.additional_simulator_parameters = additional_simulator_parameters # hide warnings gym.logger.set_level(40) """ load and initialize environment environment ids can be defined in 3 ways: 1. Native gym environments like BreakoutDeterministic-v0 for example 2. Custom gym environments written and installed as python packages. This environments should have a python module with a class inheriting gym.Env, implementing the relevant functions (_reset, _step, _render) and defining the observation and action space For example: my_environment_package:MyEnvironmentClass will run an environment defined in the MyEnvironmentClass class 3. Custom gym environments written as an independent module which is not installed. This environments should have a python module with a class inheriting gym.Env, implementing the relevant functions (_reset, _step, _render) and defining the observation and action space. For example: path_to_my_environment.sub_directory.my_module:MyEnvironmentClass will run an environment defined in the MyEnvironmentClass class which is located in the module in the relative path path_to_my_environment.sub_directory.my_module """ if ':' in self.env_id: # custom environments if '/' in self.env_id or '.' in self.env_id: # environment in a an absolute path module written as a unix path or in a relative path module # written as a python import path env_class = short_dynamic_import(self.env_id) else: # environment in a python package env_class = gym.envs.registration.load(self.env_id) # instantiate the environment try: self.env = env_class(**self.additional_simulator_parameters) except: screen.error( "Failed to instantiate Gym environment class %s with arguments %s" % (env_class, self.additional_simulator_parameters), crash=False) raise else: self.env = gym.make(self.env_id) # for classic control we want to use the native renderer because otherwise we will get 2 renderer windows environment_to_always_use_with_native_rendering = [ 'classic_control', 'mujoco', 'robotics' ] self.native_rendering = self.native_rendering or \ any([env in str(self.env.unwrapped.__class__) for env in environment_to_always_use_with_native_rendering]) if self.native_rendering: if hasattr(self, 'renderer'): self.renderer.close() # seed if self.seed is not None: self.env.seed(self.seed) np.random.seed(self.seed) random.seed(self.seed) # frame skip and max between consecutive frames self.is_mujoco_env = 'mujoco' in str(self.env.unwrapped.__class__) self.is_roboschool_env = 'roboschool' in str( self.env.unwrapped.__class__) self.is_atari_env = 'Atari' in str(self.env.unwrapped.__class__) if self.is_atari_env: self.env.unwrapped.frameskip = 1 # this accesses the atari env that is wrapped with a timelimit wrapper env if self.env_id == "SpaceInvadersDeterministic-v4" and self.frame_skip == 4: screen.warning( "Warning: The frame-skip for Space Invaders was automatically updated from 4 to 3. " "This is following the DQN paper where it was noticed that a frame-skip of 3 makes the " "laser rays disappear. To force frame-skip of 4, please use SpaceInvadersNoFrameskip-v4." ) self.frame_skip = 3 self.env = MaxOverFramesAndFrameskipEnvWrapper( self.env, frameskip=self.frame_skip, max_over_num_frames=self.max_over_num_frames) else: self.env.unwrapped.frameskip = self.frame_skip self.state_space = StateSpace({}) # observations if not isinstance(self.env.observation_space, gym.spaces.dict.Dict): state_space = {'observation': self.env.observation_space} else: state_space = self.env.observation_space.spaces for observation_space_name, observation_space in state_space.items(): if observation_space_type == ObservationSpaceType.Tensor: # we consider arbitrary input tensor which does not necessarily represent images self.state_space[ observation_space_name] = TensorObservationSpace( shape=np.array(observation_space.shape), low=observation_space.low, high=observation_space.high) elif observation_space_type == ObservationSpaceType.Image or len( observation_space.shape) == 3: # we assume gym has image observations (with arbitrary number of channels) where their values are # within 0-255, and where the channel dimension is the last dimension if observation_space.shape[-1] in [1, 3]: self.state_space[ observation_space_name] = ImageObservationSpace( shape=np.array(observation_space.shape), high=255, channels_axis=-1) else: # For any number of channels other than 1 or 3, use the generic PlanarMaps space self.state_space[ observation_space_name] = PlanarMapsObservationSpace( shape=np.array(observation_space.shape), low=0, high=255, channels_axis=-1) elif observation_space_type == ObservationSpaceType.Vector or len( observation_space.shape) == 1: self.state_space[ observation_space_name] = VectorObservationSpace( shape=observation_space.shape[0], low=observation_space.low, high=observation_space.high) else: raise screen.error( "Failed to instantiate Gym environment class %s with observation space type %s" % (env_class, observation_space_type), crash=True) if 'desired_goal' in state_space.keys(): self.goal_space = self.state_space['desired_goal'] # actions if type(self.env.action_space) == gym.spaces.box.Box: self.action_space = BoxActionSpace( shape=self.env.action_space.shape, low=self.env.action_space.low, high=self.env.action_space.high) elif type(self.env.action_space) == gym.spaces.discrete.Discrete: actions_description = [] if hasattr(self.env.unwrapped, 'get_action_meanings'): actions_description = self.env.unwrapped.get_action_meanings() self.action_space = DiscreteActionSpace( num_actions=self.env.action_space.n, descriptions=actions_description) else: raise screen.error(( "Failed to instantiate gym environment class {} due to unsupported " "action space {}. Expected BoxActionSpace or DiscreteActionSpace." ).format(env_class, self.env.action_space), crash=True) if self.human_control: # TODO: add this to the action space # map keyboard keys to actions self.key_to_action = {} if hasattr(self.env.unwrapped, 'get_keys_to_action'): self.key_to_action = self.env.unwrapped.get_keys_to_action() else: screen.error( "Error: Environment {} does not support human control.". format(self.env), crash=True) # initialize the state by getting a new state from the environment self.reset_internal_state(True) # render if self.is_rendered: image = self.get_rendered_image() scale = 1 if self.human_control: scale = 2 if not self.native_rendering: self.renderer.create_screen(image.shape[1] * scale, image.shape[0] * scale) # the info is only updated after the first step self.state = self.step(self.action_space.default_action).next_state self.state_space['measurements'] = VectorObservationSpace( shape=len(self.info.keys())) if self.env.spec and custom_reward_threshold is None: self.reward_success_threshold = self.env.spec.reward_threshold self.reward_space = RewardSpace( 1, reward_success_threshold=self.reward_success_threshold) self.target_success_rate = target_success_rate
def test_preset_reward(preset_name, no_progress_bar=True, time_limit=60 * 60, verbose=True): preset_validation_params = validation_params(preset_name) win_size = 10 test_name = '__test_reward_{}'.format(preset_name) test_path = os.path.join('./experiments', test_name) if path.exists(test_path): shutil.rmtree(test_path) # run the experiment in a separate thread screen.log_title("Running test {}".format(preset_name)) log_file_name = 'test_log_{preset_name}.txt'.format( preset_name=preset_name) cmd = [ 'python3', 'rl_coach/coach.py', '-p', '{preset_name}'.format(preset_name=preset_name), '-e', '{test_name}'.format(test_name=test_name), '-n', '{num_workers}'.format( num_workers=preset_validation_params.num_workers), '--seed', '0', '-c' ] if preset_validation_params.reward_test_level: cmd += [ '-lvl', '{level}'.format(level=preset_validation_params.reward_test_level) ] stdout = open(log_file_name, 'w') p = subprocess.Popen(cmd, stdout=stdout, stderr=stdout) start_time = time.time() reward_str = 'Evaluation Reward' if preset_validation_params.num_workers > 1: filename_pattern = 'worker_0*.csv' else: filename_pattern = '*.csv' test_passed = False # get the csv with the results csv_paths = read_csv_paths(test_path, filename_pattern) if csv_paths: csv_path = csv_paths[0] # verify results csv = None time.sleep(1) averaged_rewards = [0] last_num_episodes = 0 if not no_progress_bar: print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, time_limit) while csv is None or ( csv['Episode #'].values[-1] < preset_validation_params.max_episodes_to_achieve_reward and time.time() - start_time < time_limit): try: csv = pd.read_csv(csv_path) except: # sometimes the csv is being written at the same time we are # trying to read it. no problem -> try again continue if reward_str not in csv.keys(): continue rewards = csv[reward_str].values rewards = rewards[~np.isnan(rewards)] if len(rewards) >= 1: averaged_rewards = np.convolve( rewards, np.ones(min(len(rewards), win_size)) / win_size, mode='valid') else: time.sleep(1) continue if not no_progress_bar: print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, time_limit) if csv['Episode #'].shape[0] - last_num_episodes <= 0: continue last_num_episodes = csv['Episode #'].values[-1] # check if reward is enough if np.any(averaged_rewards >= preset_validation_params.min_reward_threshold): test_passed = True break time.sleep(1) # kill test and print result # os.killpg(os.getpgid(p.pid), signal.SIGKILL) p.kill() screen.log('') if test_passed: screen.success("Passed successfully") else: if time.time() - start_time > time_limit: screen.error("Failed due to exceeding time limit", crash=False) if verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) elif csv_paths: screen.error("Failed due to insufficient reward", crash=False) if verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) screen.error( "preset_validation_params.max_episodes_to_achieve_reward: {}". format( preset_validation_params.max_episodes_to_achieve_reward), crash=False) screen.error( "preset_validation_params.min_reward_threshold: {}".format( preset_validation_params.min_reward_threshold), crash=False) screen.error("averaged_rewards: {}".format(averaged_rewards), crash=False) screen.error("episode number: {}".format( csv['Episode #'].values[-1]), crash=False) else: screen.error("csv file never found", crash=False) if verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) shutil.rmtree(test_path) os.remove(log_file_name) if not test_passed: raise ValueError('golden test failed')
def get_config_args(self, parser: argparse.ArgumentParser) -> argparse.Namespace: """ Returns a Namespace object with all the user-specified configuration options needed to launch. This implementation uses argparse to take arguments from the CLI, but this can be over-ridden by another method that gets its configuration from elsewhere. An equivalent method however must return an identically structured Namespace object, which conforms to the structure defined by get_argument_parser. This method parses the arguments that the user entered, does some basic validation, and modification of user-specified values in short form to be more explicit. :param parser: a parser object which implicitly defines the format of the Namespace that is expected to be returned. :return: the parsed arguments as a Namespace """ args = parser.parse_args() if args.nocolor: screen.set_use_colors(False) # if no arg is given if len(sys.argv) == 1: parser.print_help() sys.exit(1) # list available presets if args.list: self.display_all_presets_and_exit() # Read args from config file for distributed Coach. if args.distributed_coach and args.distributed_coach_run_type == RunType.ORCHESTRATOR: coach_config = ConfigParser({ 'image': '', 'memory_backend': 'redispubsub', 'data_store': 's3', 's3_end_point': 's3.amazonaws.com', 's3_bucket_name': '', 's3_creds_file': '' }) try: coach_config.read(args.distributed_coach_config_path) args.image = coach_config.get('coach', 'image') args.memory_backend = coach_config.get('coach', 'memory_backend') args.data_store = coach_config.get('coach', 'data_store') if args.data_store == 's3': args.s3_end_point = coach_config.get( 'coach', 's3_end_point') args.s3_bucket_name = coach_config.get( 'coach', 's3_bucket_name') args.s3_creds_file = coach_config.get( 'coach', 's3_creds_file') except Error as e: screen.error( "Error when reading distributed Coach config file: {}". format(e)) if args.image == '': screen.error("Image cannot be empty.") data_store_choices = ['s3', 'nfs'] if args.data_store not in data_store_choices: screen.warning("{} data store is unsupported.".format( args.data_store)) screen.error( "Supported data stores are {}.".format(data_store_choices)) memory_backend_choices = ['redispubsub'] if args.memory_backend not in memory_backend_choices: screen.warning("{} memory backend is not supported.".format( args.memory_backend)) screen.error("Supported memory backends are {}.".format( memory_backend_choices)) if args.data_store == 's3': if args.s3_bucket_name == '': screen.error("S3 bucket name cannot be empty.") if args.s3_creds_file == '': args.s3_creds_file = None if args.play and args.distributed_coach: screen.error("Playing is not supported in distributed Coach.") # replace a short preset name with the full path if args.preset is not None: args.preset = self.expand_preset(args.preset) # validate the checkpoints args if args.checkpoint_restore_dir is not None and not os.path.exists( args.checkpoint_restore_dir): screen.error( "The requested checkpoint folder to load from does not exist.") # validate the checkpoints args if args.checkpoint_restore_file is not None and not glob( args.checkpoint_restore_file + '*'): screen.error( "The requested checkpoint file to load from does not exist.") # no preset was given. check if the user requested to play some environment on its own if args.preset is None and args.play and not args.environment_type: screen.error( 'When no preset is given for Coach to run, and the user requests human control over ' 'the environment, the user is expected to input the desired environment_type and level.' '\nAt least one of these parameters was not given.') elif args.preset and args.play: screen.error( "Both the --preset and the --play flags were set. These flags can not be used together. " "For human control, please use the --play flag together with the environment type flag (-et)" ) elif args.preset is None and not args.play: screen.error( "Please choose a preset using the -p flag or use the --play flag together with choosing an " "environment type (-et) in order to play the game.") # get experiment name and path args.experiment_name = logger.get_experiment_name(args.experiment_name) args.experiment_path = logger.get_experiment_path(args.experiment_name) if args.play and args.num_workers > 1: screen.warning( "Playing the game as a human is only available with a single worker. " "The number of workers will be reduced to 1") args.num_workers = 1 args.framework = Frameworks[args.framework.lower()] # checkpoints args.checkpoint_save_dir = os.path.join( args.experiment_path, 'checkpoint') if args.checkpoint_save_secs is not None else None if args.export_onnx_graph and not args.checkpoint_save_secs: screen.warning( "Exporting ONNX graphs requires setting the --checkpoint_save_secs flag. " "The --export_onnx_graph will have no effect.") return args
def main(): parser = argparse.ArgumentParser() parser.add_argument( '-p', '--preset', help="(string) Name of a preset to run (as configured in presets.py)", default=None, type=str) parser.add_argument( '-ip', '--ignore_presets', help= "(string) Name of a preset(s) to ignore (comma separated, and as configured in presets.py)", default=None, type=str) parser.add_argument( '-v', '--verbose', help="(flag) display verbose logs in the event of an error", action='store_true') parser.add_argument( '--stop_after_first_failure', help="(flag) stop executing tests after the first error", action='store_true') parser.add_argument( '-ow', '--overwrite', help="(flag) overwrite old trace with new ones in trace testing mode", action='store_true') parser.add_argument('-prl', '--parallel', help="(flag) run tests in parallel", action='store_true') parser.add_argument( '-mt', '--max_threads', help="(int) maximum number of threads to run in parallel", default=multiprocessing.cpu_count() - 2, type=int) args = parser.parse_args() if not args.parallel: args.max_threads = 1 if args.preset is not None: presets_lists = [args.preset] else: presets_lists = [ f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets')) if f[-3:] == '.py' and not f == '__init__.py' ] fail_count = 0 test_count = 0 if args.ignore_presets is not None: presets_to_ignore = args.ignore_presets.split(',') else: presets_to_ignore = [] for idx, preset_name in enumerate(sorted(presets_lists)): if args.stop_after_first_failure and fail_count > 0: break if preset_name not in presets_to_ignore: try: preset = import_module( 'rl_coach.presets.{}'.format(preset_name)) except: screen.error("Failed to load preset <{}>".format(preset_name), crash=False) fail_count += 1 test_count += 1 continue preset_validation_params = preset.graph_manager.preset_validation_params num_env_steps = preset_validation_params.trace_max_env_steps if preset_validation_params.test_using_a_trace_test: if preset_validation_params.trace_test_levels: for level in preset_validation_params.trace_test_levels: test_count += 1 test_path, log_file, p = run_trace_based_test( preset_name, num_env_steps, level) processes.append((test_path, log_file, p)) test_passed = wait_and_check(args, processes) if test_passed is not None and not test_passed: fail_count += 1 else: test_count += 1 test_path, log_file, p = run_trace_based_test( preset_name, num_env_steps) processes.append((test_path, log_file, p)) test_passed = wait_and_check(args, processes) if test_passed is not None and not test_passed: fail_count += 1 while len(processes) > 0: test_passed = wait_and_check(args, processes, force=True) if test_passed is not None and not test_passed: fail_count += 1 screen.separator() if fail_count == 0: screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully") else: screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully")