def run(self): """This is the main function for running a complete clone, build, flash and test job.""" global export_history current_status = status.d[status.RUNNING] log.debug("Job/{} : {}".format(current_status, self.job)) time_start = time.time() pr_id = self.job.pr_id() pr_sha1 = self.job.pr_sha1() db.update_job(pr_id, pr_sha1, current_status, "N/A") github.update_state(self.job.payload, "pending", "Job running!") current_status = status.d[self.start_job()] export_history.clear() running_time = utils.get_running_time(time_start) log.debug("Job/{} : {} --> {}".format(current_status, self.job, running_time)) db.update_job(pr_id, pr_sha1, current_status, running_time)
def run_no_baseline(discount_factors, learn_rates, hidden_dims, init_temps, stochasticity, n_runs, n_episodes): # no baseline best_result = 0 best_settings = dict() results_file = f'results/s{stochasticity}_no_baseline.csv' best_settings_file = f'results/s{stochasticity}_no_baseline_best_settings.pkl' with open(results_file, 'w') as f: f.write('discount_factor,learn_rate,hidden_dim,init_temp,result' + '\n') for discount_factor in discount_factors: for learn_rate in learn_rates: for hidden_dim in hidden_dims: for init_temp in init_temps: print('#' * 30) print('#' * 9 + ' NEW SEARCH ' + '#' * 9) print('#' * 30) print() st = time() # change this for learned baseline print( f'Search settings: baseline=run_episodes_no_baseline, discount_factor={discount_factor}, learn_rate={learn_rate}, hidden_dim={hidden_dim}, init_temp={init_temp}' ) # initialize the environment env = gym.make('CartPole-v1') # <---------- change this! result = 0 for i in range(n_runs): start_time = time() policy_model = PolicyNetwork( input_dim=4, hidden_dim=hidden_dim, output_dim=2 ) # change input_ and output_dim for gridworld env seed = 40 + i set_seeds(env, seed) episode_durations, _ = run_episodes_no_baseline( policy_model, env, n_episodes, discount_factor, learn_rate, init_temp, stochasticity) result += np.mean(episode_durations) del policy_model end_time = time() h, m, s = get_running_time(end_time - start_time) print( f'Done with run {i+1}/{n_runs} in {f"{h} hours, " if h else ""}{f"{m} minutes and " if m else ""}{s} seconds' ) env.close() result /= n_runs with open(results_file, 'a') as f: f.write( f'{discount_factor},{learn_rate},{hidden_dim},{init_temp},{result}' + '\n') et = time() h, m, s = get_running_time(et - st) print( f'Done with search in {f"{h} hours, " if h else ""}{f"{m} minutes and " if m else ""}{s} seconds' ) print(f'Average number of steps per episode: {result}') if result > best_result: best_result = result best_settings['discount_factor'] = discount_factor best_settings['learn_rate'] = learn_rate best_settings['hidden_dim'] = hidden_dim best_settings['init_temp'] = init_temp best_settings['result'] = best_result pkl.dump(best_settings, open(best_settings_file, 'wb')) print(f'New best result!: {result}') print(f'New best settings!: {best_settings}') print() print() print() print(f'Best settings after completing grid search: {best_settings}') # Choose what to run by uncommenting #run_no_baseline(discount_factors, learn_rates, hidden_dims, init_temps, stochasticity, n_runs, n_episodes) #run_learned_baseline(discount_factors, learn_rates, hidden_dims, init_temps, stochasticity, n_runs, n_episodes) #run_selfcritic_baseline(discount_factors, learn_rates, hidden_dims, init_temps, stochasticity, n_runs, n_episodes)
def run_selfcritic_baseline(stochasticity, n_runs, n_episodes): # self-critic baseline dir_path = os.path.dirname(os.path.realpath(__file__)) best_settings_file = dir_path + f'/cart_pole_parameter_search/s{stochasticity}_SC_baseline_best_settings.pkl' eval_file = f'cart_evals/s{stochasticity}_SC_baseline.pkl' with open(best_settings_file, 'rb') as pickle_file: best_settings = pkl.load(pickle_file) discount_factor = best_settings['discount_factor'] learn_rate = best_settings['learn_rate'] hidden_dim = best_settings['hidden_dim'] init_temp = best_settings['init_temp'] st = time() # change this for learned baseline print( f'Run settings: baseline=run_episodes_with_SC_baseline, discount_factor={discount_factor}, learn_rate={learn_rate}, hidden_dim={hidden_dim}, init_temp={init_temp}' ) # initialize the environment env = gym.make('CartPole-v1') episode_durations_list = [] reinforce_loss_list = [] for i in range(n_runs): start_time = time() policy_model = PolicyNetwork( input_dim=4, hidden_dim=hidden_dim, output_dim=2) # change input_ and output_dim for gridworld env seed = 40 + i set_seeds(env, seed) episode_durations, reinforce_loss = run_episodes_with_SC_baseline( policy_model, env, n_episodes, discount_factor, learn_rate, init_temp, stochasticity) episode_durations_list.append(episode_durations) reinforce_loss_list.append(reinforce_loss) del policy_model end_time = time() h, m, s = get_running_time(end_time - start_time) print( f'Done with run {i+1}/{n_runs} in {f"{h} hours, " if h else ""}{f"{m} minutes and " if m else ""}{s} seconds' ) env.close() et = time() h, m, s = get_running_time(et - st) evals = {} evals['episode_durations'] = episode_durations_list evals['reinforce_loss'] = reinforce_loss_list pkl.dump(evals, open(eval_file, 'wb')) print( f'Done with runs in {f"{h} hours, " if h else ""}{f"{m} minutes and " if m else ""}{s} seconds' )