action='store_true', help='Enable CUDA') parser.add_argument("--name", required=True, help="Name of the run") parser.add_argument("-n", required=True, help="Unroll parameter") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") save_path = os.path.join("saves", "ddpg-" + args.name) os.makedirs(save_path, exist_ok=True) env = gym.make(ENV_ID) test_env = gym.make(ENV_ID) act_net = model.DDPGActor(env.observation_space.shape[0], env.action_space.shape[0]).to(device) crt_net = model.DDPGCritic(env.observation_space.shape[0], env.action_space.shape[0]).to(device) print(act_net) print(crt_net) tgt_act_net = ptan.agent.TargetNet(act_net) tgt_crt_net = ptan.agent.TargetNet(crt_net) writer = SummaryWriter(comment="-ddpg_" + args.name) agent = model.AgentDDPG(act_net, device=device) exp_source = ptan.experience.ExperienceSourceFirstLast(env, agent, gamma=GAMMA, steps_count=args.n) buffer = ptan.experience.ExperienceReplayBuffer(exp_source, buffer_size=REPLAY_SIZE) act_opt = optim.Adam(act_net.parameters(), lr=LEARNING_RATE) crt_opt = optim.Adam(crt_net.parameters(), lr=LEARNING_RATE)
steps += 1 # If done proceed to next try if is_done: break return rewards / count, steps / count # Create buffer auxiliars Experience = namedtuple('Episode', field_names=['state', 'action', 'reward', 'last_state', 'done']) # Initialize simulator sim = simulator.Agent(random(), random()) # Initialize networks and inteligent agents act_net = model.DDPGActor(OBSERVATION_SPACE, ACTION_SPACE).to(device) crt_net = model.DDPGCritic(OBSERVATION_SPACE, ACTION_SPACE).to(device) tgt_act_net = ptan.agent.TargetNet(act_net) tgt_crt_net = ptan.agent.TargetNet(crt_net) agent = model.AgentDDPG(act_net, device=device) act_opt = optim.Adam(act_net.parameters(), lr=LEARNING_RATE) crt_opt = optim.Adam(crt_net.parameters(), lr=LEARNING_RATE) # Define soft_max function for discrete actions def softmax_function(values): return_values = [max(MIN_PROB_EXPLORATION, np.exp(value)/np.exp(values).sum()) for value in values] return np.random.choice(len(return_values), p=return_values/sum(return_values)) buffer = [] iteration = 0