Python envs Examples

Programming Language: Python

Namespace/Package Name: envDest

Method/Function: envs

Examples at hotexamples.com: 2

Python envs - 2 examples found. These are the top rated real world Python examples of envDest.envs extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test.py Project: pzhao28/physics_based_RL_for_CR

    #if d > intruder.radius:
    cos_theta = np.sqrt(d**2 - intruder.radius**2) / d
    sin_beta1 = sin_alpha * cos_theta - cos_alpha * sin_theta  # beta1 = alpha - theta
    cos_beta1 = cos_alpha * cos_theta + sin_alpha * sin_theta
    sin_beta2 = sin_alpha * cos_theta + cos_alpha * sin_theta  # beta2 = alpha + theta
    cos_beta2 = cos_alpha * cos_theta - sin_alpha * sin_theta
    T1x = cx + R * cos_beta1
    T1y = cy + R * sin_beta1
    T2x = cx + R * cos_beta2
    T2y = cy + R * sin_beta2
    T1 = (T1x, T1y)
    T2 = (T2x, T2y)

    return T1, T2

if __name__ =="__main__":
    action_std = 0.5
    envs = Env.envs()  # pixels = 80*80
    envs_dest = envDest.envs()
    action_dim = envs.action_size
    agent_num = envs.num_agents
    Test = test()
    ppo = ActorCritic(action_dim, action_std).to(device)
    ppo.load_state_dict(torch.load('./PPO_continuous.pth'))
    moving, count = Test.run(ppo)
    np.savetxt('test_moving.csv', moving)
    plt_path(moving)
    print("conflict number: {}".format(count))

Example #2

Show file

File: SSD_DRL_PPO_continuous_action2.py Project: pzhao28/physics_based_RL_for_CR

def main():
    ################### Hyperparameters ##################
    solved_reward = -0.5
    log_interval = 20  # print avg reward in the interval
    max_episodes = 2000  # max training episodes
    max_timesteps = 200  # max timesteps in one episode
    update_timestep = 500  # update policy every n timesteps
    action_std = 0.5  #0.5 # constant std for action distribution (Multivariate Normal)
    K_epochs = 10  # update policy for K epochs
    eps_clip = 0.2  # clip parameter for PPO
    gamma = 0.99  # discount factor

    lr = 0.0003  # parameters for Adam optimizer
    betas = (0.9, 0.999)

    ######################################################

    envs = Env.envs()  # pixels = 80*80
    envs_dest = envDest.envs()

    action_dim = envs.action_size
    agent_num = envs.num_agents

    memory = Memory()
    ppo = PPO(action_dim, action_std, lr, betas, gamma, K_epochs, eps_clip)
    print(lr, betas)

    #logging variables
    running_reward = 0
    avg_length = 0
    time_step = 0

    record_reward = []

    #training loop
    for i_episode in range(1, max_episodes + 1):
        envs.reset()
        envs_dest.reset()
        frame, _, reward, _ = envs.step([2] * agent_num)
        _, frame_dest, _, _ = envs_dest.step([2] * agent_num)

        state = preprocess_batch([frame, frame_dest])
        for t in range(max_timesteps):
            time_step += 1
            # Running policy_old
            action, _ = ppo.select_action(state, memory)
            frame, frame_dest, reward, done = envs.step(action)
            _, frame_dest, _, _ = envs_dest.step(action)
            state = preprocess_batch([frame, frame_dest])

            # Saving reward and is_terminals:
            memory.rewards.append(reward)
            memory.is_terminals.append(done)

            # update if its time
            if time_step % update_timestep == 0:
                ppo.update(memory)
                memory.clear_memory()
                time_step = 0
            running_reward += reward
            if done.any():
                break

        avg_length += t
        #plt.imshow(frame_dest[0,:,:,:])

        # stop training if avg_reward > solved_reward

        if i_episode % 500 == 0:
            torch.save(ppo.policy.state_dict(), './PPO_continuous.pth')

        if i_episode % log_interval == 0:
            avg_length = int(avg_length / log_interval)
            running_reward = (np.mean(running_reward) / log_interval)
            record_reward.append(running_reward)
            print('Episode {} \t Avg reward: {}'.format(
                i_episode, running_reward))

            if running_reward > solved_reward:
                print("########## Sloved! ##########")
                torch.save(ppo.policy.state_dict(),
                           './PPO_continuous_solved.pth')
                break

            running_reward = 0
            avg_length = 0

    np.savetxt('data_no_action_penalty_2.csv', record_reward)
    Test = test()
    moving = Test.run(ppo)
    np.savetxt('test_moving.csv', moving)
    plt_path(moving)