Ejemplo n.º 1
0
params.test_episodes = 10
params.goal = 0

mu = str(params.mu).split(".")
mu = str(mu[0] + mu[1])
params.actor_model_dir = "../../logs/models/DDPG-original/{}/actor-mu{}/".format(
    str(params.env_name.split("-")[0]), mu)
params.critic_model_dir = "../../logs/models/DDPG-original/{}/critic-mu{}/".format(
    str(params.env_name.split("-")[0]), mu)

env = gym.make(params.env_name)
# env = Monitor(env, "./video/{}/".format(str(params.env_name.split("-")[0])), force=True)

tf.random.set_random_seed(params.seed)
random_process = None
agent = DDPG(Actor, Critic, env.action_space.shape[0], random_process, params)

# set seed
env.seed(params.seed)

state = env.reset()
done = False
r, episode_reward = list(), 0
reward_forward, reward_ctrl, reward_contact, reward_survive = list(), list(
), list(), list()
actions = list()

while not done:
    # deterministic policy
    action = agent.eval_predict(state)
    # scale for execution in env (in DDPG, every action is clipped between [-1, 1] in agent.predict)
Ejemplo n.º 2
0
mu = str(params.mu).split(".")
mu = str(mu[0] + mu[1])
params.log_dir = "../../logs/logs/DDPG_batchnorm-{}-seed{}/{}-mu{}".format(
    params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu)
params.actor_model_dir = "../../logs/models/DDPG_batchnorm-{}-seed{}/{}/actor-mu{}/".format(
    params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu)
params.critic_model_dir = "../../logs/models/DDPG_batchnorm-{}-seed{}/{}/critic-mu{}/".format(
    params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu)
params.video_dir = "../../logs/video/DDPG_batchnorm-{}-seed{}/{}-mu{}/".format(
    params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu)
params.plot_path = "../../logs/plots/DDPG_batchnorm-{}-seed{}/{}-mu{}/".format(
    params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu)

env = gym.make(params.env_name)
env = Monitor(env, params.video_dir)

# set seed
env.seed(params.seed)
tf.random.set_random_seed(params.seed)

replay_buffer = ReplayBuffer(params.memory_size)
reward_buffer = deque(maxlen=params.reward_buffer_ep)
summary_writer = tf.contrib.summary.create_file_writer(params.log_dir)
random_process = OrnsteinUhlenbeckProcess(size=env.action_space.shape[0],
                                          theta=0.15,
                                          mu=params.mu,
                                          sigma=params.sigma)
# random_process = GaussianNoise(mu=params.mu, sigma=params.sigma)
agent = DDPG(Actor, Critic, env.action_space.shape[0], random_process, params)
train_DDPG_original(agent, env, replay_buffer, reward_buffer, summary_writer)
now = datetime.datetime.now()

if params.debug_flg:
    params.log_dir = "../logs/logs/" + now.strftime("%Y%m%d-%H%M%S") + "-DDPG/"
    params.model_dir = "../logs/models/" + now.strftime("%Y%m%d-%H%M%S") + "-DDPG/"
else:
    params.log_dir = "../logs/logs/{}".format(params.env_name)
    params.model_dir = "../logs/models/{}".format(params.env_name)

env = gym.make(params.env_name)

# set seed
env.seed(params.seed)
tf.compat.v1.random.set_random_seed(params.seed)

agent = DDPG(Actor, Critic, env.action_space.shape[0], params)
replay_buffer = ReplayBuffer(params.memory_size)
reward_buffer = deque(maxlen=params.reward_buffer_ep)
summary_writer = tf.contrib.summary.create_file_writer(params.log_dir)

init_state = env.reset()  # reset
agent.predict(init_state)  # burn the format of the input matrix to get the weight matrices!!
gp_model, update = create_bayes_net()
optimiser = tf.compat.v1.train.AdamOptimizer()
num_sample = 100  # number of sampling

get_ready(agent.params)

global_timestep = tf.compat.v1.train.get_or_create_global_step()
time_buffer = deque(maxlen=agent.params.reward_buffer_ep)
log = logger(agent.params)
Ejemplo n.º 4
0
params.plot_path = "./logs/plots/DDPG-seed{}/".format(params.seed)

env = make_grid_env(plot_path=params.plot_path)

# set seed
env.seed(params.seed)
tf.random.set_random_seed(params.seed)

replay_buffer = ReplayBuffer(params.memory_size)
reward_buffer = deque(maxlen=params.reward_buffer_ep)
summary_writer = tf.contrib.summary.create_file_writer(params.log_dir)
random_process = OrnsteinUhlenbeckProcess(size=env.action_space.shape[0],
                                          theta=0.15,
                                          mu=params.mu,
                                          sigma=params.sigma)
agent = DDPG(Actor, Critic, env.action_space.shape[0], random_process, params)

get_ready(agent.params)

global_timestep = tf.compat.v1.train.get_or_create_global_step()
time_buffer = deque(maxlen=agent.params.reward_buffer_ep)
log = logger(agent.params)

traj = list()

with summary_writer.as_default():
    # for summary purpose, we put all codes in this context
    with tf.contrib.summary.always_record_summaries():

        for i in itertools.count():
            state = env.reset()
Ejemplo n.º 5
0
now = datetime.now()

params.log_dir += "{}".format(params.env_name)
params.model_dir += "{}".format(params.env_name)

rospy.init_node("start_her")
task_and_robot_environment_name = rospy.get_param(
    '/fetch/task_and_robot_environment_name')
# to register our task env to openai env.
# so that we don't care the output of this method for now.
env = StartOpenAI_ROS_Environment(task_and_robot_environment_name)

# params.max_action = env.action_space.high[0]
# params.num_action = env.action_space.shape[0]

# TODO: this is temp solution..... check openai's fetch's implementation!!
params.max_action = 0
params.num_action = 4

# set seed
env.seed(params.seed)
tf.random.set_random_seed(params.seed)

agent = DDPG(Actor, Critic, params.num_action, params)

replay_buffer = ReplayBuffer(params.memory_size)
reward_buffer = deque(maxlen=params.num_episodes)
summary_writer = tf.contrib.summary.create_file_writer(params.log_dir)
train_HER(agent, env, replay_buffer, reward_buffer, summary_writer)
Ejemplo n.º 6
0
                    help="debug mode or not")
parser.add_argument("--google_colab",
                    default=False,
                    type=bool,
                    help="if you are executing this on GoogleColab")
params = parser.parse_args()
params.test_episodes = 10
params.goal = DDPG_ENV_LIST[params.env_name]

now = datetime.now()

if params.debug_flg:
    params.log_dir = "../../logs/logs/" + now.strftime(
        "%Y%m%d-%H%M%S") + "-DDPG/"
    params.model_dir = "../../logs/models/" + now.strftime(
        "%Y%m%d-%H%M%S") + "-DDPG/"
else:
    params.log_dir = "../../logs/logs/{}".format(params.env_name)
    params.model_dir = "../../logs/models/{}".format(params.env_name)

env = gym.make(params.env_name)
# set seed
env.seed(params.seed)
tf.random.set_random_seed(params.seed)

agent = DDPG(Actor, Critic, env.action_space.shape[0], params)
replay_buffer = ReplayBuffer(params.memory_size)
reward_buffer = deque(maxlen=params.reward_buffer_ep)
summary_writer = tf.contrib.summary.create_file_writer(params.log_dir)
train_DDPG(agent, env, replay_buffer, reward_buffer, summary_writer)