def __init__(self, seed=0, env_name='MultiGoal', mdn_hidden_spec=None, v_epochs=50, v_hdim=32, v_lr=1e-3, p_epochs=30, p_hdim=32, p_lr=3e-4, clip_range=0.2, alpha=40.0, batch_size=128, episode_size=100, nupdates=150, gamma=0.99, max_step=1000): # Fix the numpy random seed seed = 0 np.random.seed(seed) # Set session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) env = MultiGoalEnv() # Get environment information obs_dim = env.observation_space.shape[0] act_dim = env.action_space.shape[0] # Set network spec if mdn_hidden_spec is None: mdn_hidden_spec = [{ 'dim': 32, 'activation': tf.nn.tanh }, { 'dim': 32, 'activation': tf.nn.tanh }] # Initialize Tensorflow Graph #tf.reset_default_graph() # Gen value network value_func = network.Value(sess, obs_dim, epochs=v_epochs, hdim=v_hdim, lr=v_lr, seed=seed) # Initialize tf variable and old variable of value network tf.set_random_seed(seed) # Gen policy function policy_func = policy.Policy(sess, obs_dim, act_dim, epochs=p_epochs, hdim=p_hdim, lr=p_lr, clip_range=clip_range, seed=seed, alpha=alpha) sess.run(tf.global_variables_initializer()) # Store All Variable to Class self.seed = seed self.env_name = env_name self.mdn_hidden_spec = mdn_hidden_spec self.v_epochs = v_epochs self.v_hdim = v_hdim self.v_lr = v_lr self.p_epochs = p_epochs self.p_hdim = p_hdim self.p_lr = p_lr self.clip_range = clip_range self.alpha = alpha self.batch_size = batch_size self.episode_size = episode_size self.nupdates = nupdates self.gamma = gamma self.obs_dim = obs_dim self.act_dim = act_dim self.env = env self.value_func = value_func self.policy_func = policy_func self.sess = sess self.config = config
episode_length += 1 #print('re:', reward) score += reward observation = observation_ paths.append(path) score = score / 200 score_history.append(score) avg_score = np.mean(score_history[-20:]) env.render_rollouts(paths, fout="test_%d.png" % i) if __name__ == '__main__': env = MultiGoalEnv() # print(env.observation_space.shape) # print(env.action_space.shape) agent = Agent(input_dims=env.observation_space.shape, env=env, n_actions=env.action_space.shape[0]) n_games = 500 # uncomment this line and do a mkdir tmp && mkdir video if you want to # record video of the agent playing the game. #env = wrappers.Monitor(env, 'tmp/video', video_callable=lambda episode_id: True, force=True) filename = 'inverted_pendulum.png' figure_file = 'plots/' + filename #print(env.action_space.high)
import gym import numpy as np import pybulletgym from SQL_torch import Agent from gym import wrappers import math from multigoal import MultiGoalEnv import torch as T from plotter import QFPolicyPlotter from copy import deepcopy if __name__ == "__main__": env = MultiGoalEnv() agent = Agent(env, hidden_dim=[256, 256], replay_size=int(1e6), pi_lr=1e-3, q_lr=1e-3, batch_size=100, n_particles=16, gamma=0.99, polyak=0.995) epochs = 100 update_after = 0 max_ep_len = 30 steps_per_epoch = 400 # Prepare for interaction with environment