def main(): parser = argparse.ArgumentParser(description="Run Extended Q-Learning with given config") parser.add_argument("-c", "--config", type=str, metavar="", required=True, help="Config file name - file must be available as .json in ./configs") args = parser.parse_args() # load config files with open(os.path.join(".", "configs", args.config), "r") as read_file: config = json.load(read_file) env = UnityEnvironment(file_name=os.path.join(*config["general"]["env_path"])) agent = Agent(config=config) if config["train"]["run_training"]: scores = sessions.train(agent, env, config) helper.plot_scores(scores) agent.save() else: agent.load() sessions.test(agent, env) env.close()
o_t_reshaped = np.reshape(o_t, (x_dim, y_dim, 2)) o_tp1_reshaped = np.reshape(o_tp1, (x_dim, y_dim, 2)) agent.update_replay_memory(o_t_reshaped, a_t, r_tp1, o_tp1_reshaped, episode_done) ep_reward += r_tp1 if not c.test and episode_done: tsp_cost = tsp_computer.rl_cost(dqn_env.steps) if max_reward < ep_reward: str_out = 'max_reward={} < ep_reward={}'.format( max_reward, ep_reward) max_reward = ep_reward agent.save(global_step=global_step) log.debug( 'saving graph {} -> steps: \n{}, \nstep_length: {}, rl_cost: {}, reward: {}, str_out={}' .format(global_step, dqn_env.steps, len(dqn_env.steps), tsp_cost, ep_reward, str_out)) if len(dqn_env.steps) == len( tsp_computer.coords.keys()) and best_tsp > tsp_cost: best_tsp = tsp_cost with open(os.path.join(save_dir, "path.txt"), "a") as myfile: myfile.write('iteration {}, rl_cost {}: {}\n'.format( global_step, tsp_cost, dqn_env.steps)) completed_episodes += 1 episode_reward.append(ep_reward) episode_length.append(t) rl_cost.append(tsp_computer.rl_cost(dqn_env.steps))