Example #1
0
def main():

    ray.shutdown()
    ray.init(include_webui=False,
             ignore_reinit_error=True,
             redis_max_memory=500000000,
             object_store_memory=5000000000)

    env = CartPoleEnv()
    env.reset()

    cw_num = 4
    ew_num = 4
    training_episodes, test_interval, trials = 10000, 50, 30
    agent = distributed_DQN_agent(env, hyperparams_CartPole, cw_num, ew_num,
                                  training_episodes, test_interval, trials)
    start_time = time.time()
    result = agent.learn_and_evaluate()
    run_time = {}
    run_time['distributed DQN agent'] = time.time() - start_time
    print("running time: ", run_time['distributed DQN agent'])

    plot_result(result, test_interval, ["batch_update with target_model"])
Example #2
0
    def __init__(self,
                 hyper_params,
                 memory_server,
                 nb_agents,
                 nb_evaluators,
                 action_space=len(ACTION_DICT)):
        self.beta = hyper_params['beta']
        self.initial_epsilon = 1
        self.final_epsilon = hyper_params['final_epsilon']
        self.epsilon_decay_steps = hyper_params['epsilon_decay_steps']
        self.hyper_params = hyper_params
        self.update_steps = hyper_params['update_steps']
        self.model_replace_freq = hyper_params['model_replace_freq']
        self.action_space = action_space
        self.batch_size = hyper_params['batch_size']
        self.memory_server = memory_server
        self.nb_agents = nb_agents
        self.nb_evaluators = nb_evaluators
        env = CartPoleEnv()
        state = env.reset()
        input_len = len(state)
        output_len = action_space
        self.eval_model = DQNModel(input_len,
                                   output_len,
                                   learning_rate=hyper_params['learning_rate'])
        self.target_model = DQNModel(input_len, output_len)

        self.agents = [
            DQN_agent_remote.remote(CartPoleEnv(), memory_server, hyper_params,
                                    action_space, i) for i in range(nb_agents)
        ]
        self.evaluators = [
            EvalWorker.remote(self.eval_model, CartPoleEnv(),
                              hyper_params['max_episode_steps'],
                              hyper_params['eval_trials'], i)
            for i in range(nb_evaluators)
        ]
Example #3
0
    def learn_and_evaluate(self):
        worker_id = []
        # evaluators_id = []

        for i in range(self.cw_num):
            simulator = CartPoleEnv()
            worker_id.append(collecting_server.remote(simulator, self.model_server, self.memory, action_space=2))

        # ray.wait(collectors_id, len(collectors_id))
        #
        for j in range(self.ew_num):
            simulator = self.env
            worker_id.append(evaluation_server.remote(simulator, self.model_server))

        ray.wait(worker_id, len(worker_id))

        return ray.get(self.model_server.get_reuslts.remote())
Example #4
0
    plt.xlabel("Episodes")
    plt.ylabel("Total Rewards")
    plt.savefig("Distributed_DQN_4_Collectors_4_Workers.png")
    plt.show()


ray.shutdown()
ray.init(include_webui=False, ignore_reinit_error=True, redis_max_memory=500000000, object_store_memory=5000000000)

from memory_remote import ReplayBuffer_remote
from dqn_model import _DQNModel
import torch
from custom_cartpole import CartPoleEnv
from collections import deque

simulator = CartPoleEnv()
result_folder = ENV_NAME
result_file = ENV_NAME + "/results4.txt"

if not os.path.isdir(result_folder):
    os.mkdir(result_folder)
torch.set_num_threads(12)

Memory_Server = ReplayBuffer_remote.remote(2000)


@ray.remote
class DQN_Model_Server():
    def __init__(self, env, hyper_params, batch_size, update_steps, memory_size, beta, model_replace_freq,
                 learning_rate, use_target_model=True, memory=Memory_Server, action_space=2,
                 training_episodes=7000, test_interval=50):
Example #5
0
from custom_cartpole import CartPoleEnv

# Set the Env name and action space for CartPole
ENV_NAME = 'CartPole_distributed'

# Set result saveing floder
result_floder = ENV_NAME + "_distributed"
result_file = ENV_NAME + "/results.txt"
if not os.path.isdir(result_floder):
    os.mkdir(result_floder)
torch.set_num_threads(12)

# Move left, Move right
ACTION_DICT = {"LEFT": 0, "RIGHT": 1}
# Register the environment
env = CartPoleEnv()

memory = ReplayBuffer_remote.remote(2000)


@ray.remote
class DQN_server(object):
    def __init__(self,
                 learning_rate,
                 training_episodes,
                 memory,
                 env,
                 test_interval=50,
                 batch_size=32,
                 action_space=len(ACTION_DICT),
                 beta=0.99):
# Set the Env name and action space for CartPole
ENV_NAME = 'CartPole_distributed'

# Set result saveing floder
result_floder = ENV_NAME
result_file = result_floder + "/results.txt"
if not os.path.isdir(result_floder):
    os.mkdir(result_floder)
torch.set_num_threads(12)

# Move left, Move right
ACTION_DICT = {"LEFT": 0, "RIGHT": 1}

# make environment
env_CartPole = CartPoleEnv()


def plot_result(total_rewards, learning_num, legend):
    print("\nLearning Performance:\n")
    episodes = []
    for i in range(len(total_rewards)):
        episodes.append(i * learning_num + 1)

    plt.figure(num=1)
    fig, ax = plt.subplots()
    plt.plot(episodes, total_rewards)
    plt.title('performance')
    plt.legend(legend)
    plt.xlabel("Episodes")
    plt.ylabel("total rewards")
Example #7
0
    'batch_size': 32,
    'update_steps': 10,
    'memory_size': 2000,
    'beta': 0.99,
    'model_replace_freq': 2000,
    'learning_rate': 0.0003,
    'use_target_model': True
}

# =================== Initialize Environment ===================
# Set the Env name and action space for CartPole
ENV_NAME = 'CartPole_distributed'
# Move left, Move right
ACTION_DICT = {"LEFT": 0, "RIGHT": 1}
# Register the environment
env_CartPole = CartPoleEnv()

# =================== Ray Init ===================
ray.shutdown()
# ray.init(include_webui=False, ignore_reinit_error=True, redis_max_memory=500000000, object_store_memory=5000000000)
ray.init()


# =================== DQN ===================
class DQN_agent(object):
    def __init__(self, env, hyper_params, action_space=len(ACTION_DICT)):
        self.env = env
        self.max_episode_steps = env._max_episode_steps
        """
            beta: The discounted factor of Q-value function
            (epsilon): The explore or exploit policy epsilon. 
Example #8
0
        worker_id = []
        # evaluators_id = []

        for i in range(self.cw_num):
            simulator = CartPoleEnv()
            worker_id.append(collecting_server.remote(simulator, self.model_server, self.memory, action_space=2))

        # ray.wait(collectors_id, len(collectors_id))
        #
        for j in range(self.ew_num):
            simulator = self.env
            worker_id.append(evaluation_server.remote(simulator, self.model_server))

        ray.wait(worker_id, len(worker_id))

        return ray.get(self.model_server.get_reuslts.remote())


####################

training_episodes, test_interval = 7000, 50
ENV_NAME = 'CartPole_distributed'

cartpole = CartPoleEnv()
# env_CartPole = gym.make(CartPoleEnv)
# print(cartpole.reset())

agent = distributed_DQN_agent(cartpole, collectors_num=8, evaluators_num=4)
result = agent.learn_and_evaluate()
plot_result(result, test_interval, ["batch_update with target_model"])

hyperparams = {
    'epsilon_decay_steps': 7000,
    'final_epsilon': 0.1,
    'batch_size': 10,
    'update_steps': 5,
    'memory_size': 2000,
    'beta': 0.99,
    'model_replace_freq': 2000,
    'learning_rate': 0.0003,
    'use_target_model': True,
    'workers': (12, 4),
    'do_test': True,
    'initial_epsilon': 1,
    'steps': 0,
    'training_episodes': 7000,
    'test_interval': 50
}


start_time = time.time()
env = CartPoleEnv()
env.reset()
agent = distributed_RL_agent(env, hyperparams)
result = agent.learn_and_evaluate()
print(result)
print(time.time() - start_time)
# plot_result(result, test_interval, ["batch_update with target_model"])
print("Done!!")
Example #10
0
from custom_cartpole import CartPoleEnv

import gym

env = CartPoleEnv()
for i_episode in range(20):
    observation = env.reset()
    for t in range(100):
        env.render()
        print(observation)
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        if done:
            print("Episode finished after {} timesteps".format(t + 1))
            break