def test_MpiAdam(): np.random.seed(0) tf.set_random_seed(0) a = tf.Variable(np.random.randn(3).astype('float32')) b = tf.Variable(np.random.randn(2, 5).astype('float32')) loss = tf.reduce_sum(tf.square(a)) + tf.reduce_sum(tf.sin(b)) stepsize = 1e-2 update_op = tf.train.AdamOptimizer(stepsize).minimize(loss) do_update = U.function([], loss, updates=[update_op]) tf.get_default_session().run(tf.global_variables_initializer()) for i in range(10): print(i, do_update()) tf.set_random_seed(0) tf.get_default_session().run(tf.global_variables_initializer()) var_list = [a, b] lossandgrad = U.function([], [loss, U.flatgrad(loss, var_list)], updates=[update_op]) adam = MpiAdam(var_list) for i in range(10): l, g = lossandgrad() adam.update(g, stepsize) print(i, l)
def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS): np.random.seed(0) np_random.seed(0) env = DummyVecEnv([env_fn]) with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto( allow_soft_placement=True)).as_default(): tf.set_random_seed(0) model = learn_fn(env) sum_rew = 0 done = True for i in range(n_trials): if done: obs = env.reset() state = model.initial_state if state is not None: a, v, state, _ = model.step(obs, S=state, M=[False]) else: a, v, _, _ = model.step(obs) obs, rew, done, _ = env.step(a) sum_rew += float(rew) print("Reward in {} trials is {}".format(n_trials, sum_rew)) assert sum_rew > min_reward_fraction * n_trials, \ 'sum of rewards {} is less than {} of the total number of trials {}'.format(sum_rew, min_reward_fraction, n_trials)
def __init__(self, hyperparams, dO, dU): config = copy.deepcopy(POLICY_OPT_TF) config.update(hyperparams) PolicyOpt.__init__(self, config, dO, dU) tf.set_random_seed(self._hyperparams['random_seed']) self.tf_iter = 0 self.batch_size = self._hyperparams['batch_size'] self.device_string = "/cpu:0" if self._hyperparams['use_gpu'] == 1: self.gpu_device = self._hyperparams['gpu_id'] self.device_string = "/gpu:" + str(self.gpu_device) self.act_op = None # mu_hat self.feat_op = None # features self.loss_scalar = None self.obs_tensor = None self.precision_tensor = None self.action_tensor = None # mu true self.solver = None self.feat_vals = None self.init_network() self.init_solver() self.var = self._hyperparams['init_var'] * np.ones(dU) self.sess = tf.Session() self.policy = TfPolicy( dU, self.obs_tensor, self.act_op, self.feat_op, np.zeros(dU), self.sess, self.device_string, copy_param_scope=self._hyperparams['copy_param_scope']) # List of indices for state (vector) data and image (tensor) data in observation. self.x_idx, self.img_idx, i = [], [], 0 if 'obs_image_data' not in self._hyperparams['network_params']: self._hyperparams['network_params'].update({'obs_image_data': []}) for sensor in self._hyperparams['network_params']['obs_include']: dim = self._hyperparams['network_params']['sensor_dims'][sensor] if sensor in self._hyperparams['network_params']['obs_image_data']: self.img_idx = self.img_idx + list(range(i, i + dim)) else: self.x_idx = self.x_idx + list(range(i, i + dim)) i += dim init_op = tf.initialize_all_variables() self.sess.run(init_op)
def set_global_seeds(i): try: import MPI rank = MPI.COMM_WORLD.Get_rank() except ImportError: rank = 0 myseed = i + 1000 * rank if i is not None else None try: from code import tensorflow_code as tf tf.set_random_seed(myseed) except ImportError: pass np.random.seed(myseed) random.seed(myseed)
def main(args): for ite in range(int(args['trial_num'])): print('Trial Number:', ite) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False) with tf.Session(config=config) as sess: if args['change_seed']: #rand_seed = 10 * ite rand_seed = np.random.randint(1, 1000, size=1) else: rand_seed = 0 env = gym.make(args['env']) np.random.seed(int(args['random_seed']) + int(rand_seed)) tf.set_random_seed(int(args['random_seed']) + int(rand_seed)) env.seed(int(args['random_seed']) + int(rand_seed)) env_test = gym.make(args['env']) env_test.seed(int(args['random_seed']) + int(rand_seed)) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] print('action_space.shape', env.action_space.shape) print('observation_space.shape', env.observation_space.shape) action_bound = env.action_space.high # Ensure action bound is symmetric #print(env.action_space.high) #print(env.action_space.low) assert (env.action_space.high[0] == -env.action_space.low[0]) agent = TD3(sess, env, state_dim, action_dim, action_bound, int(args['minibatch_size']), tau=float(args['tau']), actor_lr=float(args['actor_lr']), critic_lr=float(args['critic_lr']), gamma=float(args['gamma']), hidden_dim=np.asarray(args['hidden_dim'])) # actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) if args['use_gym_monitor']: if not args['render_env']: env = wrappers.Monitor(env, args['monitor_dir'], video_callable=False, force=True) else: env = wrappers.Monitor( env, args['monitor_dir'], video_callable=lambda episode_id: episode_id % 50 == 0, force=True) step_R_i = train(sess, env, env_test, args, agent) result_path = "./results/trials/" try: import pathlib pathlib.Path(result_path).mkdir(parents=True, exist_ok=True) except: print( "A result directory does not exist and cannot be created. The trial results are not saved" ) result_filename = args['result_file'] + '_' + args[ 'env'] + '_trial_idx_' + str(int(args['trial_idx'])) + '.txt' if args['overwrite_result'] and ite == 0: np.savetxt(result_filename, np.asarray(step_R_i)) else: data = np.loadtxt(result_filename, dtype=float) data_new = np.vstack((data, np.asarray(step_R_i))) np.savetxt(result_filename, data_new) if args['use_gym_monitor']: env.monitor.close()
def main(args): result_name = 'TD3_' + args['env'] + '_trial_idx_' + str( int(args['trial_idx'])) for ite in range(int(args['trial_num'])): print('Trial Number:', ite) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False) with tf.Session(config=config) as sess: if args['change_seed']: rand_seed = 10 * ite else: rand_seed = 0 np.random.seed(int(args['random_seed']) + int(rand_seed)) tf.set_random_seed(int(args['random_seed']) + int(rand_seed)) env = gym.make(args['env']) env.seed(int(args['random_seed']) + int(rand_seed)) if args['save_video']: try: import pathlib pathlib.Path("./Video/" + args['env']).mkdir(parents=True, exist_ok=True) video_relative_path = "./Video/" + args['env'] + "/" ## To save video of the first episode env = gym.wrappers.Monitor( env, video_relative_path, video_callable=lambda episode_id: episode_id == 0, force=True) ## To save video of every episodes # env_test = gym.wrappers.Monitor(env_test, video_relative_path, \ # video_callable=lambda episode_id: episode_id%1==0, force =True) except: print( "Cannot create video directories. Video will not be saved." ) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] action_bound = env.action_space.high # Ensure action bound is symmetric assert (env.action_space.high[0] == -env.action_space.low[0]) if args['method_name'] == 'TD3': from TD3_keras_agent import TD3 agent = TD3( sess, env, state_dim, action_dim, action_bound, int(args['minibatch_size']), tau=float(args['tau']), actor_lr=float(args['actor_lr']), critic_lr=float(args['critic_lr']), gamma=float(args['gamma']), hidden_dim=np.asarray(args['hidden_dim']), ) agent.load_model(iteration=int(args['load_model_iter']), expname=result_name) # if args['use_gym_monitor']: # if not args['render_env']: # env = wrappers.Monitor( # env, args['monitor_dir'], video_callable=False, force=True) # else: # env = wrappers.Monitor(env, args['monitor_dir'], video_callable=lambda episode_id: episode_id==0, force=True) test(sess, env, args, agent, result_name)