def evaluate(self, steps: PlayingStepsType, reset_before_eval: bool = True) -> bool: """Perform evaluation for several steps Args: steps (PlayingStepsType): the number of steps as a tuple of steps time and steps count reset_before_eval (bool): True if reset before evaluation and False otherwise. Returns: bool, True if the target reward and target success has been reached """ self.verify_graph_was_created() if steps.num_steps > 0: with self.phase_context(RunPhase.TEST): # reset all the levels before starting to evaluate if reset_before_eval: self.reset_internal_state(force_environment_reset=True) self.sync() # act for at least `steps`, though don't interrupt an episode count_end = self.current_step_counter + steps while self.current_step_counter < count_end: self.act(EnvironmentEpisodes(1)) self.sync() if self.should_stop(): self.flush_finished() screen.success("Reached required success rate. Exiting.") return True return False
def evaluate(self, steps: PlayingStepsType, keep_networks_in_sync: bool = False) -> bool: """ Perform evaluation for several steps :param steps: the number of steps as a tuple of steps time and steps count :param keep_networks_in_sync: sync the network parameters with the global network before each episode :return: bool, True if the target reward and target success has been reached """ self.verify_graph_was_created() if steps.num_steps > 0: with self.phase_context(RunPhase.TEST): # reset all the levels before starting to evaluate self.reset_internal_state(force_environment_reset=True) self.sync() # act for at least `steps`, though don't interrupt an episode count_end = self.current_step_counter + steps while self.current_step_counter < count_end: self.act(EnvironmentEpisodes(1)) self.sync() if self.should_stop(): if self.task_parameters.checkpoint_save_dir: open( os.path.join(self.task_parameters.checkpoint_save_dir, SyncFiles.FINISHED.value), 'w').close() if hasattr(self, 'data_store_params'): data_store = self.get_data_store(self.data_store_params) data_store.save_to_store() screen.success("Reached required success rate. Exiting.") return True return False
def evaluate(self, steps: PlayingStepsType) -> bool: """ Perform evaluation for several steps :param steps: the number of steps as a tuple of steps time and steps count :return: bool, True if the target reward and target success has been reached """ import smdebug.tensorflow as smd self.smdebug_hook.set_mode(smd.modes.EVAL) self.verify_graph_was_created() if steps.num_steps > 0: with self.phase_context(RunPhase.TEST): # reset all the levels before starting to evaluate self.reset_internal_state(force_environment_reset=True) self.sync() # act for at least `steps`, though don't interrupt an episode count_end = self.current_step_counter + steps while self.current_step_counter < count_end: self.act(EnvironmentEpisodes(1)) self.sync() if self.should_stop(): self.flush_finished() screen.success("Reached required success rate. Exiting.") return True return False
def evaluate(self, steps: PlayingStepsType) -> bool: """ Perform evaluation for several steps :param steps: the number of steps as a tuple of steps time and steps count :return: bool, True if the target reward and target success has been reached """ self.verify_graph_was_created() if steps.num_steps > 0: with self.phase_context(RunPhase.TEST): # reset all the levels before starting to evaluate self.reset_internal_state(force_environment_reset=True) self.sync() # act for at least `steps`, though don't interrupt an episode count_end = self.current_step_counter + steps while self.current_step_counter < count_end: # In case of an evaluation-only worker, fake a phase transition before and after every # episode to make sure results are logged correctly if self.task_parameters.evaluate_only is not None: self.phase = RunPhase.TEST self.act(EnvironmentEpisodes(1)) self.sync() if self.task_parameters.evaluate_only is not None: self.phase = RunPhase.TRAIN if self.should_stop(): self.flush_finished() screen.success("Reached required success rate. Exiting.") return True return False
def test_preset_runs(preset): test_failed = False print("Testing preset {}".format(preset)) # TODO: this is a temporary workaround for presets which define more than a single available level. # we should probably do this in a more robust way level = "" if "Atari" in preset: level = "breakout" elif "Mujoco" in preset: level = "inverted_pendulum" elif "ControlSuite" in preset: level = "pendulum:swingup" experiment_name = ".test-" + preset # overriding heatup steps to some small number of steps (1000), so to finish the heatup stage, and get to train params = [ "python3", "rl_coach/coach.py", "-p", preset, "-ns", "-e", experiment_name, '-cp', 'heatup_steps=EnvironmentSteps(1000)' ] if level != "": params += ["-lvl", level] p = Popen(params) # wait 30 seconds overhead of initialization, and finishing heatup. time.sleep(30) return_value = p.poll() if return_value is None: screen.success("{} passed successfully".format(preset)) else: test_failed = True screen.error("{} failed".format(preset), crash=False) p.kill() if os.path.exists("experiments/" + experiment_name): shutil.rmtree("experiments/" + experiment_name) assert not test_failed
def test_preset_runs(preset): test_failed = False print("Testing preset {}".format(preset)) # TODO: this is a temporary workaround for presets which define more than a single available level. # we should probably do this in a more robust way level = "" if "Atari" in preset: level = "breakout" elif "Mujoco" in preset: level = "inverted_pendulum" elif "ControlSuite" in preset: level = "pendulum:swingup" elif 'Lab' in preset: level = 'nav_maze_static_01' experiment_name = ".test-" + preset params = [ sys.executable, "rl_coach/coach.py", "-p", preset, "-ns", "-e", experiment_name ] if level != "": params += ["-lvl", level] p = Popen(params) # wait 10 seconds overhead of initialization etc. time.sleep(10) return_value = p.poll() if return_value is None: screen.success("{} passed successfully".format(preset)) else: test_failed = True screen.error("{} failed".format(preset), crash=False) p.kill() if os.path.exists("experiments/" + experiment_name): shutil.rmtree("experiments/" + experiment_name) assert not test_failed
def test_all_presets_are_running(): # os.chdir("../../") test_failed = False all_presets = sorted([f.split('.')[0] for f in os.listdir('rl_coach/presets') if f.endswith('.py') and f != '__init__.py']) for preset in all_presets: print("Testing preset {}".format(preset)) # TODO: this is a temporary workaround for presets which define more than a single available level. # we should probably do this in a more robust way level = "" if "Atari" in preset: level = "breakout" elif "Mujoco" in preset: level = "inverted_pendulum" elif "ControlSuite" in preset: level = "pendulum:swingup" params = ["python3", "rl_coach/coach.py", "-p", preset, "-ns", "-e", ".test"] if level != "": params += ["-lvl", level] p = Popen(params, stdout=DEVNULL) # wait 10 seconds overhead of initialization etc. time.sleep(10) return_value = p.poll() if return_value is None: screen.success("{} passed successfully".format(preset)) else: test_failed = True screen.error("{} failed".format(preset), crash=False) p.kill() if os.path.exists("experiments/.test"): shutil.rmtree("experiments/.test") assert not test_failed
def perform_reward_based_tests(args, preset_validation_params, preset_name): win_size = 10 test_name = '__test_reward' test_path = os.path.join('./experiments', test_name) if path.exists(test_path): shutil.rmtree(test_path) # run the experiment in a separate thread screen.log_title("Running test {}".format(preset_name)) log_file_name = 'test_log_{preset_name}.txt'.format( preset_name=preset_name) cmd = ('python3 rl_coach/coach.py ' '-p {preset_name} ' '-e {test_name} ' '-n {num_workers} ' '--seed 0 ' '-c ' '{level} ' '&> {log_file_name} ').format( preset_name=preset_name, test_name=test_name, num_workers=preset_validation_params.num_workers, log_file_name=log_file_name, level='-lvl ' + preset_validation_params.reward_test_level if preset_validation_params.reward_test_level else '') p = subprocess.Popen(cmd, shell=True, executable="/bin/bash", preexec_fn=os.setsid) start_time = time.time() reward_str = 'Evaluation Reward' if preset_validation_params.num_workers > 1: filename_pattern = 'worker_0*.csv' else: filename_pattern = '*.csv' test_passed = False # get the csv with the results csv_paths = read_csv_paths(test_path, filename_pattern) if csv_paths: csv_path = csv_paths[0] # verify results csv = None time.sleep(1) averaged_rewards = [0] last_num_episodes = 0 if not args.no_progress_bar: print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, args) while csv is None or ( csv['Episode #'].values[-1] < preset_validation_params.max_episodes_to_achieve_reward and time.time() - start_time < args.time_limit): try: csv = pd.read_csv(csv_path) except: # sometimes the csv is being written at the same time we are # trying to read it. no problem -> try again continue if reward_str not in csv.keys(): continue rewards = csv[reward_str].values rewards = rewards[~np.isnan(rewards)] if len(rewards) >= 1: averaged_rewards = np.convolve( rewards, np.ones(min(len(rewards), win_size)) / win_size, mode='valid') else: time.sleep(1) continue if not args.no_progress_bar: print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, args) if csv['Episode #'].shape[0] - last_num_episodes <= 0: continue last_num_episodes = csv['Episode #'].values[-1] # check if reward is enough if np.any(averaged_rewards >= preset_validation_params.min_reward_threshold): test_passed = True break time.sleep(1) # kill test and print result os.killpg(os.getpgid(p.pid), signal.SIGTERM) screen.log('') if test_passed: screen.success("Passed successfully") else: if time.time() - start_time > args.time_limit: screen.error("Failed due to exceeding time limit", crash=False) if args.verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) elif csv_paths: screen.error("Failed due to insufficient reward", crash=False) if args.verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) screen.error( "preset_validation_params.max_episodes_to_achieve_reward: {}". format( preset_validation_params.max_episodes_to_achieve_reward), crash=False) screen.error( "preset_validation_params.min_reward_threshold: {}".format( preset_validation_params.min_reward_threshold), crash=False) screen.error("averaged_rewards: {}".format(averaged_rewards), crash=False) screen.error("episode number: {}".format( csv['Episode #'].values[-1]), crash=False) else: screen.error("csv file never found", crash=False) if args.verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) shutil.rmtree(test_path) os.remove(log_file_name) return test_passed
def main(): parser = argparse.ArgumentParser() parser.add_argument('-t', '--trace', help="(flag) perform trace based testing", action='store_true') parser.add_argument( '-p', '--preset', help="(string) Name of a preset to run (as configured in presets.py)", default=None, type=str) parser.add_argument( '-ip', '--ignore_presets', help= "(string) Name of a preset(s) to ignore (comma separated, and as configured in presets.py)", default=None, type=str) parser.add_argument( '-v', '--verbose', help="(flag) display verbose logs in the event of an error", action='store_true') parser.add_argument( '--stop_after_first_failure', help="(flag) stop executing tests after the first error", action='store_true') parser.add_argument( '-tl', '--time_limit', help="time limit for each test in minutes", default= 40, # setting time limit to be so high due to DDPG being very slow - its tests are long type=int) parser.add_argument( '-np', '--no_progress_bar', help= "(flag) Don't print the progress bar (makes jenkins logs more readable)", action='store_true') parser.add_argument( '-ow', '--overwrite', help="(flag) overwrite old trace with new ones in trace testing mode", action='store_true') args = parser.parse_args() if args.preset is not None: presets_lists = [args.preset] else: # presets_lists = list_all_classes_in_module(presets) presets_lists = [ f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets')) if f[-3:] == '.py' and not f == '__init__.py' ] fail_count = 0 test_count = 0 args.time_limit = 60 * args.time_limit if args.ignore_presets is not None: presets_to_ignore = args.ignore_presets.split(',') else: presets_to_ignore = [] for idx, preset_name in enumerate(sorted(presets_lists)): if args.stop_after_first_failure and fail_count > 0: break if preset_name not in presets_to_ignore: try: preset = import_module( 'rl_coach.presets.{}'.format(preset_name)) except: screen.error("Failed to load preset <{}>".format(preset_name), crash=False) fail_count += 1 test_count += 1 continue preset_validation_params = preset.graph_manager.preset_validation_params if not args.trace and not preset_validation_params.test: continue if args.trace: num_env_steps = preset_validation_params.trace_max_env_steps if preset_validation_params.trace_test_levels: for level in preset_validation_params.trace_test_levels: test_count += 1 test_passed = perform_trace_based_tests( args, preset_name, num_env_steps, level) if not test_passed: fail_count += 1 else: test_count += 1 test_passed = perform_trace_based_tests( args, preset_name, num_env_steps) if not test_passed: fail_count += 1 else: test_passed = perform_reward_based_tests( args, preset_validation_params, preset_name) if not test_passed: fail_count += 1 screen.separator() if fail_count == 0: screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully") else: screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully")
def perform_trace_based_tests(args, preset_name, num_env_steps, level=None): test_name = '__test_trace' test_path = os.path.join('./experiments', test_name) if path.exists(test_path): shutil.rmtree(test_path) # run the experiment in a separate thread screen.log_title("Running test {}{}".format(preset_name, ' - ' + level if level else '')) log_file_name = 'test_log_{preset_name}.txt'.format( preset_name=preset_name) cmd = ('python3 rl_coach/coach.py ' '-p {preset_name} ' '-e {test_name} ' '--seed 42 ' '-c ' '--no_summary ' '-cp {custom_param} ' '{level} ' '&> {log_file_name} ').format( preset_name=preset_name, test_name=test_name, log_file_name=log_file_name, level='-lvl ' + level if level else '', custom_param='\"improve_steps=EnvironmentSteps({n});' 'steps_between_evaluation_periods=EnvironmentSteps({n});' 'evaluation_steps=EnvironmentSteps(1);' 'heatup_steps=EnvironmentSteps(1024)\"'.format(n=num_env_steps)) p = subprocess.Popen(cmd, shell=True, executable="/bin/bash", preexec_fn=os.setsid) p.wait() filename_pattern = '*.csv' # get the csv with the results csv_paths = read_csv_paths(test_path, filename_pattern) test_passed = False if not csv_paths: screen.error("csv file never found", crash=False) if args.verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) else: trace_path = os.path.join( './rl_coach', 'traces', preset_name + '_' + level.replace(':', '_') if level else preset_name, '') if not os.path.exists(trace_path): screen.log( 'No trace found, creating new trace in: {}'.format(trace_path)) os.makedirs(os.path.dirname(trace_path)) df = pd.read_csv(csv_paths[0]) df = clean_df(df) df.to_csv(os.path.join(trace_path, 'trace.csv'), index=False) screen.success("Successfully created new trace.") test_passed = True else: test_df = pd.read_csv(csv_paths[0]) test_df = clean_df(test_df) new_trace_csv_path = os.path.join(trace_path, 'trace_new.csv') test_df.to_csv(new_trace_csv_path, index=False) test_df = pd.read_csv(new_trace_csv_path) trace_csv_path = glob.glob(path.join(trace_path, 'trace.csv')) trace_csv_path = trace_csv_path[0] trace_df = pd.read_csv(trace_csv_path) test_passed = test_df.equals(trace_df) if test_passed: screen.success("Passed successfully.") os.remove(new_trace_csv_path) test_passed = True else: screen.error("Trace test failed.", crash=False) if args.overwrite: os.remove(trace_csv_path) os.rename(new_trace_csv_path, trace_csv_path) screen.error("Overwriting old trace.", crash=False) else: screen.error("bcompare {} {}".format( trace_csv_path, new_trace_csv_path), crash=False) shutil.rmtree(test_path) os.remove(log_file_name) return test_passed
def main(): parser = argparse.ArgumentParser() parser.add_argument( '-p', '--preset', '--presets', help= "(string) Name of preset(s) to run (comma separated, and as configured in presets.py)", default=None, type=str) parser.add_argument( '-ip', '--ignore_presets', help= "(string) Name of preset(s) to ignore (comma separated, and as configured in presets.py)", default=None, type=str) parser.add_argument( '-v', '--verbose', help="(flag) display verbose logs in the event of an error", action='store_true') parser.add_argument( '--stop_after_first_failure', help="(flag) stop executing tests after the first error", action='store_true') parser.add_argument( '-tl', '--time_limit', help="time limit for each test in minutes", default= 60, # setting time limit to be so high due to DDPG being very slow - its tests are long type=int) parser.add_argument( '-np', '--no_progress_bar', help= "(flag) Don't print the progress bar (makes jenkins logs more readable)", action='store_true') args = parser.parse_args() if args.preset is not None: presets_lists = args.preset.split(',') else: presets_lists = all_presets() fail_count = 0 test_count = 0 args.time_limit = 60 * args.time_limit if args.ignore_presets is not None: presets_to_ignore = args.ignore_presets.split(',') else: presets_to_ignore = [] for idx, preset_name in enumerate(sorted(presets_lists)): if args.stop_after_first_failure and fail_count > 0: break if preset_name not in presets_to_ignore: print("Attempting to run Preset: %s" % preset_name) if not importable(preset_name): screen.error("Failed to load preset <{}>".format(preset_name), crash=False) fail_count += 1 test_count += 1 continue if not has_test_parameters(preset_name): continue test_count += 1 try: test_preset_reward(preset_name, args.no_progress_bar, args.time_limit, args.verbose) except Exception as e: fail_count += 1 screen.separator() if fail_count == 0: screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully") else: screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully")
def test_preset_reward(preset_name, no_progress_bar=True, time_limit=60 * 60, verbose=True): preset_validation_params = validation_params(preset_name) win_size = 10 test_name = '__test_reward_{}'.format(preset_name) test_path = os.path.join('./experiments', test_name) if path.exists(test_path): shutil.rmtree(test_path) # run the experiment in a separate thread screen.log_title("Running test {}".format(preset_name)) log_file_name = 'test_log_{preset_name}.txt'.format( preset_name=preset_name) cmd = [ 'python3', 'rl_coach/coach.py', '-p', '{preset_name}'.format(preset_name=preset_name), '-e', '{test_name}'.format(test_name=test_name), '-n', '{num_workers}'.format( num_workers=preset_validation_params.num_workers), '--seed', '0', '-c' ] if preset_validation_params.reward_test_level: cmd += [ '-lvl', '{level}'.format(level=preset_validation_params.reward_test_level) ] stdout = open(log_file_name, 'w') p = subprocess.Popen(cmd, stdout=stdout, stderr=stdout) start_time = time.time() reward_str = 'Evaluation Reward' if preset_validation_params.num_workers > 1: filename_pattern = 'worker_0*.csv' else: filename_pattern = '*.csv' test_passed = False # get the csv with the results csv_paths = read_csv_paths(test_path, filename_pattern) if csv_paths: csv_path = csv_paths[0] # verify results csv = None time.sleep(1) averaged_rewards = [0] last_num_episodes = 0 if not no_progress_bar: print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, time_limit) while csv is None or ( csv['Episode #'].values[-1] < preset_validation_params.max_episodes_to_achieve_reward and time.time() - start_time < time_limit): try: csv = pd.read_csv(csv_path) except: # sometimes the csv is being written at the same time we are # trying to read it. no problem -> try again continue if reward_str not in csv.keys(): continue rewards = csv[reward_str].values rewards = rewards[~np.isnan(rewards)] if len(rewards) >= 1: averaged_rewards = np.convolve( rewards, np.ones(min(len(rewards), win_size)) / win_size, mode='valid') else: time.sleep(1) continue if not no_progress_bar: print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, time_limit) if csv['Episode #'].shape[0] - last_num_episodes <= 0: continue last_num_episodes = csv['Episode #'].values[-1] # check if reward is enough if np.any(averaged_rewards >= preset_validation_params.min_reward_threshold): test_passed = True break time.sleep(1) # kill test and print result # os.killpg(os.getpgid(p.pid), signal.SIGKILL) p.kill() screen.log('') if test_passed: screen.success("Passed successfully") else: if time.time() - start_time > time_limit: screen.error("Failed due to exceeding time limit", crash=False) if verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) elif csv_paths: screen.error("Failed due to insufficient reward", crash=False) if verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) screen.error( "preset_validation_params.max_episodes_to_achieve_reward: {}". format( preset_validation_params.max_episodes_to_achieve_reward), crash=False) screen.error( "preset_validation_params.min_reward_threshold: {}".format( preset_validation_params.min_reward_threshold), crash=False) screen.error("averaged_rewards: {}".format(averaged_rewards), crash=False) screen.error("episode number: {}".format( csv['Episode #'].values[-1]), crash=False) else: screen.error("csv file never found", crash=False) if verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) shutil.rmtree(test_path) os.remove(log_file_name) if not test_passed: raise ValueError('golden test failed')
def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--preset', '--presets', help="(string) Name of preset(s) to run (comma separated, as configured in presets.py)", default=None, type=str) parser.add_argument('-ip', '--ignore_presets', help="(string) Name of preset(s) to ignore (comma separated, and as configured in presets.py)", default=None, type=str) parser.add_argument('-v', '--verbose', help="(flag) display verbose logs in the event of an error", action='store_true') parser.add_argument('--stop_after_first_failure', help="(flag) stop executing tests after the first error", action='store_true') parser.add_argument('-ow', '--overwrite', help="(flag) overwrite old trace with new ones in trace testing mode", action='store_true') parser.add_argument('-prl', '--parallel', help="(flag) run tests in parallel", action='store_true') parser.add_argument('-ut', '--update_traces', help="(flag) update traces on repository", action='store_true') parser.add_argument('-mt', '--max_threads', help="(int) maximum number of threads to run in parallel", default=multiprocessing.cpu_count()-2, type=int) parser.add_argument( '-i', '--image', help="(string) Name of the testing image", type=str, default=None ) parser.add_argument( '-mb', '--memory_backend', help="(string) Name of the memory backend", type=str, default="redispubsub" ) parser.add_argument( '-e', '--endpoint', help="(string) Name of the s3 endpoint", type=str, default='s3.amazonaws.com' ) parser.add_argument( '-cr', '--creds_file', help="(string) Path of the s3 creds file", type=str, default='.aws_creds' ) parser.add_argument( '-b', '--bucket', help="(string) Name of the bucket for s3", type=str, default=None ) args = parser.parse_args() if args.update_traces: if not args.bucket: print("bucket_name required for s3") exit(1) if not os.environ.get('AWS_ACCESS_KEY_ID') or not os.environ.get('AWS_SECRET_ACCESS_KEY'): print("AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY env vars need to be set") exit(1) config_file = './tmp.cred' generate_config(args.image, args.memory_backend, args.endpoint, args.bucket, args.creds_file, config_file) if not args.parallel: args.max_threads = 1 if args.preset is not None: presets_lists = args.preset.split(',') else: presets_lists = [f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets')) if f[-3:] == '.py' and not f == '__init__.py'] fail_count = 0 test_count = 0 if args.ignore_presets is not None: presets_to_ignore = args.ignore_presets.split(',') else: presets_to_ignore = [] for idx, preset_name in enumerate(sorted(presets_lists)): if args.stop_after_first_failure and fail_count > 0: break if preset_name not in presets_to_ignore: try: preset = import_module('rl_coach.presets.{}'.format(preset_name)) except: screen.error("Failed to load preset <{}>".format(preset_name), crash=False) fail_count += 1 test_count += 1 continue preset_validation_params = preset.graph_manager.preset_validation_params num_env_steps = preset_validation_params.trace_max_env_steps if preset_validation_params.test_using_a_trace_test: if preset_validation_params.trace_test_levels: for level in preset_validation_params.trace_test_levels: test_count += 1 test_path, log_file, p = run_trace_based_test(preset_name, num_env_steps, level) processes.append((test_path, log_file, p)) test_passed = wait_and_check(args, processes) if test_passed is not None and not test_passed: fail_count += 1 else: test_count += 1 test_path, log_file, p = run_trace_based_test(preset_name, num_env_steps) processes.append((test_path, log_file, p)) test_passed = wait_and_check(args, processes) if test_passed is not None and not test_passed: fail_count += 1 while len(processes) > 0: test_passed = wait_and_check(args, processes, force=True) if test_passed is not None and not test_passed: fail_count += 1 screen.separator() if fail_count == 0: screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully") else: screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully", crash=False)
def wait_and_check(args, processes, force=False): if not force and len(processes) < args.max_threads: return None test_path = processes[0][0] test_name = test_path.split('/')[-1] log_file_name = processes[0][1] p = processes[0][2] p.wait() filename_pattern = '*.csv' # get the csv with the results csv_paths = read_csv_paths(test_path, filename_pattern) test_passed = False screen.log('Results for {}: '.format(test_name[13:])) if not csv_paths: screen.error("csv file never found", crash=False) if args.verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) else: trace_path = os.path.join('./rl_coach', 'traces', test_name[13:]) if not os.path.exists(trace_path): screen.log('No trace found, creating new trace in: {}'.format(trace_path)) os.makedirs(trace_path) df = pd.read_csv(csv_paths[0]) df = clean_df(df) try: df.to_csv(os.path.join(trace_path, 'trace.csv'), index=False) except: pass screen.success("Successfully created new trace.") test_passed = True else: test_df = pd.read_csv(csv_paths[0]) test_df = clean_df(test_df) new_trace_csv_path = os.path.join(trace_path, 'trace_new.csv') test_df.to_csv(new_trace_csv_path, index=False) test_df = pd.read_csv(new_trace_csv_path) trace_csv_path = glob.glob(path.join(trace_path, 'trace.csv')) trace_csv_path = trace_csv_path[0] trace_df = pd.read_csv(trace_csv_path) test_passed = test_df.equals(trace_df) if test_passed: screen.success("Passed successfully.") os.remove(new_trace_csv_path) test_passed = True else: screen.error("Trace test failed.", crash=False) if args.overwrite: os.remove(trace_csv_path) os.rename(new_trace_csv_path, trace_csv_path) screen.error("Overwriting old trace.", crash=False) else: screen.error("bcompare {} {}".format(trace_csv_path, new_trace_csv_path), crash=False) shutil.rmtree(test_path) os.remove(log_file_name) processes.pop(0) return test_passed
def main(): parser = argparse.ArgumentParser() parser.add_argument( '-p', '--preset', help="(string) Name of a preset to run (as configured in presets.py)", default=None, type=str) parser.add_argument( '-ip', '--ignore_presets', help= "(string) Name of a preset(s) to ignore (comma separated, and as configured in presets.py)", default=None, type=str) parser.add_argument( '-v', '--verbose', help="(flag) display verbose logs in the event of an error", action='store_true') parser.add_argument( '--stop_after_first_failure', help="(flag) stop executing tests after the first error", action='store_true') parser.add_argument( '-ow', '--overwrite', help="(flag) overwrite old trace with new ones in trace testing mode", action='store_true') parser.add_argument('-prl', '--parallel', help="(flag) run tests in parallel", action='store_true') parser.add_argument( '-mt', '--max_threads', help="(int) maximum number of threads to run in parallel", default=multiprocessing.cpu_count() - 2, type=int) args = parser.parse_args() if not args.parallel: args.max_threads = 1 if args.preset is not None: presets_lists = [args.preset] else: presets_lists = [ f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets')) if f[-3:] == '.py' and not f == '__init__.py' ] fail_count = 0 test_count = 0 if args.ignore_presets is not None: presets_to_ignore = args.ignore_presets.split(',') else: presets_to_ignore = [] for idx, preset_name in enumerate(sorted(presets_lists)): if args.stop_after_first_failure and fail_count > 0: break if preset_name not in presets_to_ignore: try: preset = import_module( 'rl_coach.presets.{}'.format(preset_name)) except: screen.error("Failed to load preset <{}>".format(preset_name), crash=False) fail_count += 1 test_count += 1 continue preset_validation_params = preset.graph_manager.preset_validation_params num_env_steps = preset_validation_params.trace_max_env_steps if preset_validation_params.test_using_a_trace_test: if preset_validation_params.trace_test_levels: for level in preset_validation_params.trace_test_levels: test_count += 1 test_path, log_file, p = run_trace_based_test( preset_name, num_env_steps, level) processes.append((test_path, log_file, p)) test_passed = wait_and_check(args, processes) if test_passed is not None and not test_passed: fail_count += 1 else: test_count += 1 test_path, log_file, p = run_trace_based_test( preset_name, num_env_steps) processes.append((test_path, log_file, p)) test_passed = wait_and_check(args, processes) if test_passed is not None and not test_passed: fail_count += 1 while len(processes) > 0: test_passed = wait_and_check(args, processes, force=True) if test_passed is not None and not test_passed: fail_count += 1 screen.separator() if fail_count == 0: screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully") else: screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully")