import sys import os sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir)) from torch.utils.tensorboard import SummaryWriter from src.env.grid_env import GridEnv from src.agent import Agent import numpy as np import pygame visualise = True grid_env = GridEnv(visualize=visualise, field_file_path="src/data/ori_data.txt", max_step=500) BATCH_SIZE = 256 GAMMA = 0.8 EPS_START = 0.5 EPS_END = 0.8 EPS_DECAY = 200 TARGET_UPDATE = 10 num_episodes = 5000000 best = dict() dqn_agent = Agent() writer = SummaryWriter(log_dir=os.path.join("log"))
# output 'output_folder': "output_actor_critic", 'log_folder': 'log', 'model_folder': 'model', 'memory_config_dir': "memory_config" } params['log_folder'] = os.path.join(params['output_folder'], params['log_folder']) params['model_folder'] = os.path.join(params['output_folder'], params['model_folder']) if not os.path.exists(params['log_folder']): os.makedirs(params['log_folder']) if not os.path.exists(params['model_folder']): os.makedirs(params['model_folder']) painter = Painter(params) if params['visualise'] else None grid_env = GridEnv(params, painter) model_path = os.path.join(params['output_folder'], "model", "Agent_dqn_state_dict_1600.mdl") agent_ac = Agent(params, painter) writer = SummaryWriter(log_dir=params['log_folder']) all_mean_rewards = [] all_mean_actor_loss = [] all_mean_critic_loss = [] time_step = 0 for i_episode in range(params['num_episodes']): observed_map, robot_pose = grid_env.reset() done = False rewards = [] actor_losses = []
'model_folder': 'model', 'memory_config_dir': "memory_config", 'use_cuda': True } params['log_folder'] = os.path.join(params['output_folder'], params['log_folder']) params['model_folder'] = os.path.join(params['output_folder'], params['model_folder']) if not os.path.exists(params['log_folder']): os.makedirs(params['log_folder']) if not os.path.exists(params['model_folder']): os.makedirs(params['model_folder']) painter = Painter(params) if params['visualise'] else None grid_env = GridEnv(params, painter) train_device = 'cuda' if torch.cuda.is_available( ) and params['use_cuda'] else 'cpu' writer = SummaryWriter(log_dir=params['log_folder']) player = Agent(params=params, writer=writer, train_agent=True, is_resume=False, filepath=None, train_device=torch.device(train_device)) total_rewards, smoothed_rewards = [], [] global_step = 0