Exemple #1
0
#Catch KeyboardInterrupts and save model
#i=-1
# try:
#     #Reinforcement Loop
#     #for i in tqdm.trange(n_episodes):
#     while True:
#        i += 1
for i in range(n_episodes):
    info, reward, state = env.reset(
    )  # reset env before starting a new episode
    j = 0
    shortterm_memory = ReplayMemory(max_size=256)
    while True:
        j += 1
        # interact with env
        action = agent.step(state)

        #observation, reward, done, info = env.step(action)
        done, base_reward, observation = env.step(action)

        #Determine real reward based on Policy
        reward = Policies.LiveLongAndProsper(base_reward, done)
        #reward = base_reward
        # store transaction in memory
        transition = [state, action, reward, observation, done]
        shortterm_memory.store(*transition)
        memory.store(*transition)

        # Step to next state
        state = observation
Exemple #2
0
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("Running on GPU")
    agent.target_net.cuda()
    agent.policy_net.cuda()

#Reinforcement Loop
for i in range(n_episodes):
    info, reward, state = env.reset(
    )  # reset env before starting a new episode
    j = 0
    while True:
        j += 1
        # interact with env
        action = agent.step(state, decay_enabled=False)

        #observation, reward, done, info = env.step(action)
        done, reward, observation = env.step(action)

        #Determine real reward based on Policy
        #reward = Policies.SoreLoser(reward, done)

        # Step to next state
        state = observation

        #Save rewards for evaluation
        R[i] = reward

        #Reset if game lasts too long:
        #Protects against environment bug where agents can be trapped outside the arena