def finalLayer(self, y, n_iters=1, learner_size=200): print "Final Layer" sigmoid = Layers.SigmoidLayer(self.X.shape[1], learner_size, noise=Noise.GaussianNoise(0.1)) softmax = Layers.SoftmaxLayer(learner_size, y.shape[1]) trainer = Trainer() sigmoid, softmax = trainer.train([sigmoid, softmax], self.X, y, n_iters) self.Layers.append(sigmoid) self.Layers.append(softmax)
if done: print(i, "---", ep_reward) break if i % 20 == 0: pass #video_saver.release() if __name__ == '__main__': batch_size = 64 tf_config = tf.ConfigProto() #tf_config.gpu_options.per_process_gpu_memory_fraction = 0.6 tf_config.gpu_options.allow_growth = True saver = tf.train.Saver() env = gym.make('InvertedPendulum-v2') #print(env.action_space.high) with tf.Session(config=tf_config) as sess: actor = Actor.Actor(sess, [4], 1, 0.0001, 0.001, batch_size) critic = Critic.Critic(sess, [4], 1, 0.001, 0.001, 0.99, actor.get_num_trainable_vars()) actor_noise = Noise.GaussianNoise() train_feature(sess, env, actor, critic, actor_noise, batch_size, saver)
with tf.Session(config=tf_config) as sess: #state_dim : 1d, action_spec : scalar if actor_type == "basic": actor = Actor.Actor(sess, state_dim, action_dim, actor_lr, tau, batch_size) elif actor_type == "rnn": actor = RNNActor.Actor(sess, state_dim, action_dim, actor_lr, tau, batch_size, num_of_action) critic = Critic.Critic(sess, state_dim, action_dim, critic_lr, tau, gamma, actor.get_num_trainable_vars(), critic_reg_weight) if noise_type == "gaussian": actor_noise = Noise.GaussianNoise(action_dim=action_dim, sigma=sigma) elif noise_type == "ou": actor_noise = Noise.OrnsteinUhlenbeckActionNoise(mu=np.zeros( [int(action_dim / num_of_action)]), sigma=sigma) exp_detail = utils.experiment_detail_saver(domain_name, task_name, step_size, actor_lr, critic_lr, tau, gamma, sigma, batch_size, critic_reg_weight) print(exp_detail) utils.append_file_writer(video_dir, "experiment_detail.txt", "num of action : " \ + str(num_of_action) + "\n") print("num of action : " + str(num_of_action))
critic_lr = 1e-3 tau = 5e-3 gamma = 0.99 sigma = 0.2 critic_reg_weight = 0.0 noise_type = "ou" assert noise_type in ["ou","gaussian"] with tf.Session(config=tf_config) as sess: #state_dim : 1d, action_spec : scalar actor = Actor.Actor(sess, state_dim, env.action_spec().shape[0], actor_lr, tau, batch_size) critic = Critic.Critic(sess, state_dim, env.action_spec().shape[0], critic_lr, tau, gamma, actor.get_num_trainable_vars(),critic_reg_weight) if noise_type == "gaussian": actor_noise = Noise.GaussianNoise(action_dim=env.action_spec().shape[0],sigma=sigma) elif noise_type == "ou": actor_noise = Noise.OrnsteinUhlenbeckActionNoise(mu=np.zeros([env.action_spec().shape[0]]), sigma=sigma) exp_detail = utils.experiment_detail_saver( domain_name, task_name, step_size, actor_lr, critic_lr, tau, gamma, sigma, batch_size, critic_reg_weight) print(exp_detail) utils.append_file_writer(video_dir, "experiment_detail.txt", "Critic origin type : "\ +critic.critic_origin_type+"\n") utils.append_file_writer(video_dir, "experiment_detail.txt", "Noise type : " \ + noise_type + "\n")