Ejemplo n.º 1
0
    def run(self):
        """This is the main function for running a complete clone, build, flash
        and test job."""
        global export_history
        current_status = status.d[status.RUNNING]

        log.debug("Job/{} : {}".format(current_status, self.job))
        time_start = time.time()

        pr_id = self.job.pr_id()
        pr_sha1 = self.job.pr_sha1()

        db.update_job(pr_id, pr_sha1, current_status, "N/A")
        github.update_state(self.job.payload, "pending", "Job running!")

        current_status = status.d[self.start_job()]

        export_history.clear()

        running_time = utils.get_running_time(time_start)
        log.debug("Job/{} : {} --> {}".format(current_status, self.job,
                  running_time))
        db.update_job(pr_id, pr_sha1, current_status, running_time)
Ejemplo n.º 2
0
def run_no_baseline(discount_factors, learn_rates, hidden_dims, init_temps,
                    stochasticity, n_runs, n_episodes):
    # no baseline
    best_result = 0
    best_settings = dict()
    results_file = f'results/s{stochasticity}_no_baseline.csv'
    best_settings_file = f'results/s{stochasticity}_no_baseline_best_settings.pkl'

    with open(results_file, 'w') as f:
        f.write('discount_factor,learn_rate,hidden_dim,init_temp,result' +
                '\n')

    for discount_factor in discount_factors:
        for learn_rate in learn_rates:
            for hidden_dim in hidden_dims:
                for init_temp in init_temps:
                    print('#' * 30)
                    print('#' * 9 + ' NEW SEARCH ' + '#' * 9)
                    print('#' * 30)
                    print()

                    st = time()

                    # change this for learned baseline
                    print(
                        f'Search settings: baseline=run_episodes_no_baseline, discount_factor={discount_factor}, learn_rate={learn_rate}, hidden_dim={hidden_dim}, init_temp={init_temp}'
                    )

                    # initialize the environment
                    env = gym.make('CartPole-v1')  # <---------- change this!

                    result = 0

                    for i in range(n_runs):
                        start_time = time()

                        policy_model = PolicyNetwork(
                            input_dim=4, hidden_dim=hidden_dim, output_dim=2
                        )  # change input_ and output_dim for gridworld env
                        seed = 40 + i
                        set_seeds(env, seed)

                        episode_durations, _ = run_episodes_no_baseline(
                            policy_model, env, n_episodes, discount_factor,
                            learn_rate, init_temp, stochasticity)
                        result += np.mean(episode_durations)

                        del policy_model

                        end_time = time()
                        h, m, s = get_running_time(end_time - start_time)

                        print(
                            f'Done with run {i+1}/{n_runs} in {f"{h} hours, " if h else ""}{f"{m} minutes and " if m else ""}{s} seconds'
                        )

                    env.close()
                    result /= n_runs

                    with open(results_file, 'a') as f:
                        f.write(
                            f'{discount_factor},{learn_rate},{hidden_dim},{init_temp},{result}'
                            + '\n')

                    et = time()
                    h, m, s = get_running_time(et - st)

                    print(
                        f'Done with search in {f"{h} hours, " if h else ""}{f"{m} minutes and " if m else ""}{s} seconds'
                    )
                    print(f'Average number of steps per episode: {result}')

                    if result > best_result:
                        best_result = result
                        best_settings['discount_factor'] = discount_factor
                        best_settings['learn_rate'] = learn_rate
                        best_settings['hidden_dim'] = hidden_dim
                        best_settings['init_temp'] = init_temp
                        best_settings['result'] = best_result

                        pkl.dump(best_settings, open(best_settings_file, 'wb'))

                        print(f'New best result!: {result}')
                        print(f'New best settings!: {best_settings}')
                    print()

    print()
    print()
    print(f'Best settings after completing grid search: {best_settings}')


# Choose what to run by uncommenting
#run_no_baseline(discount_factors, learn_rates, hidden_dims, init_temps, stochasticity, n_runs, n_episodes)
#run_learned_baseline(discount_factors, learn_rates, hidden_dims, init_temps, stochasticity, n_runs, n_episodes)
#run_selfcritic_baseline(discount_factors, learn_rates, hidden_dims, init_temps, stochasticity, n_runs, n_episodes)
def run_selfcritic_baseline(stochasticity, n_runs, n_episodes):
    # self-critic baseline
    dir_path = os.path.dirname(os.path.realpath(__file__))
    best_settings_file = dir_path + f'/cart_pole_parameter_search/s{stochasticity}_SC_baseline_best_settings.pkl'
    eval_file = f'cart_evals/s{stochasticity}_SC_baseline.pkl'

    with open(best_settings_file, 'rb') as pickle_file:
        best_settings = pkl.load(pickle_file)
    discount_factor = best_settings['discount_factor']
    learn_rate = best_settings['learn_rate']
    hidden_dim = best_settings['hidden_dim']
    init_temp = best_settings['init_temp']

    st = time()

    # change this for learned baseline
    print(
        f'Run settings: baseline=run_episodes_with_SC_baseline, discount_factor={discount_factor}, learn_rate={learn_rate}, hidden_dim={hidden_dim}, init_temp={init_temp}'
    )

    # initialize the environment
    env = gym.make('CartPole-v1')

    episode_durations_list = []
    reinforce_loss_list = []

    for i in range(n_runs):
        start_time = time()

        policy_model = PolicyNetwork(
            input_dim=4, hidden_dim=hidden_dim,
            output_dim=2)  # change input_ and output_dim for gridworld env
        seed = 40 + i
        set_seeds(env, seed)

        episode_durations, reinforce_loss = run_episodes_with_SC_baseline(
            policy_model, env, n_episodes, discount_factor, learn_rate,
            init_temp, stochasticity)

        episode_durations_list.append(episode_durations)
        reinforce_loss_list.append(reinforce_loss)

        del policy_model

        end_time = time()
        h, m, s = get_running_time(end_time - start_time)

        print(
            f'Done with run {i+1}/{n_runs} in {f"{h} hours, " if h else ""}{f"{m} minutes and " if m else ""}{s} seconds'
        )

    env.close()

    et = time()
    h, m, s = get_running_time(et - st)

    evals = {}
    evals['episode_durations'] = episode_durations_list
    evals['reinforce_loss'] = reinforce_loss_list

    pkl.dump(evals, open(eval_file, 'wb'))

    print(
        f'Done with runs in {f"{h} hours, " if h else ""}{f"{m} minutes and " if m else ""}{s} seconds'
    )