def main():
    parser = argparse.ArgumentParser(description="Run Extended Q-Learning with given config")
    parser.add_argument("-c",
                        "--config",
                        type=str,
                        metavar="",
                        required=True,
                        help="Config file name - file must be available as .json in ./configs")

    args = parser.parse_args()

    # load config files
    with open(os.path.join(".", "configs", args.config), "r") as read_file:
        config = json.load(read_file)

    env = UnityEnvironment(file_name=os.path.join(*config["general"]["env_path"]))
    agent = Agent(config=config)

    if config["train"]["run_training"]:
        scores = sessions.train(agent, env, config)
        helper.plot_scores(scores)
        agent.save()
    else:
        agent.load()
        sessions.test(agent, env)

    env.close()
コード例 #2
0
        o_t_reshaped = np.reshape(o_t, (x_dim, y_dim, 2))
        o_tp1_reshaped = np.reshape(o_tp1, (x_dim, y_dim, 2))

        agent.update_replay_memory(o_t_reshaped, a_t, r_tp1, o_tp1_reshaped,
                                   episode_done)

        ep_reward += r_tp1

        if not c.test and episode_done:
            tsp_cost = tsp_computer.rl_cost(dqn_env.steps)
            if max_reward < ep_reward:
                str_out = 'max_reward={} < ep_reward={}'.format(
                    max_reward, ep_reward)
                max_reward = ep_reward
                agent.save(global_step=global_step)
                log.debug(
                    'saving graph {} -> steps: \n{}, \nstep_length: {}, rl_cost: {}, reward: {}, str_out={}'
                    .format(global_step, dqn_env.steps, len(dqn_env.steps),
                            tsp_cost, ep_reward, str_out))
            if len(dqn_env.steps) == len(
                    tsp_computer.coords.keys()) and best_tsp > tsp_cost:
                best_tsp = tsp_cost
                with open(os.path.join(save_dir, "path.txt"), "a") as myfile:
                    myfile.write('iteration {}, rl_cost {}: {}\n'.format(
                        global_step, tsp_cost, dqn_env.steps))

            completed_episodes += 1
            episode_reward.append(ep_reward)
            episode_length.append(t)
            rl_cost.append(tsp_computer.rl_cost(dqn_env.steps))