pbar_evals = tqdm.tqdm(total=eval_count, desc="Evaluations") for sample_size in configs['S']: for rollout_max in configs['Roll-outs']: for sig_lvl in configs['Significance']: run_results = evaluations_per_config( s_size=sample_size, n_actions=configs['Actions'][0], max_n_rollouts=rollout_max, sig_lvl=sig_lvl, max_policy_iter_per_run=10, runs_per_config=10, off_policy_explr=EXPLORE_LOGIC, rollout_tracking=False, dataset_tracking=False, train_plot_tracking=False, eval_summary_tracking=False, show_experiment_eval_plot=False) agg_results.append(run_results) pbar_evals.update(1) pbar_evals.close() # Save the evaluation results results_dfs = []
run_results = evaluations_per_config( s_size=sample_size #, init_state_path = configs['init_state_path'] # Use a pre-designed init state configs , n_actions=configs['Actions'][0], max_n_rollouts=rollout_max, sig_lvl=sig_lvl, max_policy_iter_per_run= 10 # Maximum number of policy iterations per experiment , runs_per_config= 10 # Number of experiments per one parameter config , eval_runs_per_state= 100 # Episodes to generate from each init. state (during evaluation) , off_policy_explr=EXPLORE_LOGIC # What algorithm to use , rollout_tracking=False # Show rollout info. , dataset_tracking=False # Show train dataset , train_plot_tracking=False # Show model training plot , eval_summary_tracking= False # Show a policy performance summary of evaluation runs , policy_behaviour_tracking= False # Show/store policy action selections vs. pendulum angle plot , show_experiment_run_eval_summary_plot= False # Show SR vs. action no. plot of exp. run )