Python Actor.create_actor_model 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: actor

클래스/타입: Actor

메소드/함수: create_actor_model

hotexamples.com에서의 예제들: 2

Python Actor.create_actor_model - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 actor.Actor.create_actor_model에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Actor(30)

__init__(30)

eval(11)

choose_action(4)

get_will_save(4)

get_reflex_save(4)

get_fortitude_save(4)

get_action(4)

forward(4)

draw(4)

take_damage(4)

add_movie(4)

act(3)

from_SQLiteRow(3)

action(3)

build(2)

get_actions(2)

get_base_attack_bonus(2)

get_full_attack(2)

create_actor_model(2)

copy_weights(2)

from_string(2)

characterid(2)

mat_name(1)

get_details(1)

setstate(1)

route(1)

push_task(1)

get_alignment_var(1)

get_attack_bonus(1)

get_attack_damage(1)

position(1)

get_base_attribute_score(1)

get_date_debut(1)

get_filter(1)

move_to(1)

get_first_name(1)

act_one_episode(1)

get_full_name(1)

get_rect(1)

mover(1)

_from_string(1)

move_towards(1)

has_private_parking(1)

isAlive(1)

get_action_for_train(1)

attack(1)

getArtistByid(1)

decayEligibilities(1)

build_train_op(1)

예제 #1

파일 보기

파일: ddpg.py 프로젝트: henrytomsf/ReinforcementLearning

    def __init__(self, env, sess, low_action_bound_list,
                 high_action_bound_list):
        self.env = env
        self.sess = sess
        self.low_action_bound_list = low_action_bound_list  # depends on the env
        self.high_action_bound_list = high_action_bound_list
        self.action_range_bound = [
            hi - lo for hi, lo in zip(self.high_action_bound_list,
                                      self.low_action_bound_list)
        ]
        self.learning_rate = 0.0001  #TODO move these to configs
        self.epsilon = 1.0
        self.epsilon_min = 0.1
        self.epsilon_decay = 1e-6
        self.gamma = 0.99
        self.tau = 0.001
        self.buffer_size = 1000000
        self.batch_size = 128
        self.theta = 0.15
        self.ou = 0
        self.sigma = 0.3

        self.state_dim = self.env.observation_space.shape[0]
        self.action_dim = len(self.low_action_bound_list
                              )  #self.env.action_space, make this into input
        self.continuous_action_space = True

        # Initialize replay buffer
        self.replay_buffer = ReplayBuffer(self.buffer_size)

        # Creating ACTOR model
        actor_ = Actor(self.state_dim, self.action_dim, self.learning_rate)
        self.actor_state_input, self.actor_model = actor_.create_actor_model()
        _, self.target_actor_model = actor_.create_actor_model()

        self.actor_critic_grad = tf.placeholder(tf.float32,
                                                [None, self.action_dim])

        actor_model_weights = self.actor_model.trainable_weights
        self.actor_grads = tf.gradients(self.actor_model.output,
                                        actor_model_weights,
                                        -self.actor_critic_grad)

        grads = zip(self.actor_grads, actor_model_weights)
        self.optimize = tf.train.AdamOptimizer(
            self.learning_rate).apply_gradients(grads)

        # Creating CRITIC model
        critic_ = Critic(self.state_dim, self.action_dim, self.learning_rate)
        self.critic_state_input, self.critic_action_input, self.critic_model = critic_.create_critic_model(
        )
        _, _, self.target_critic_model = critic_.create_critic_model()

        self.critic_grads = tf.gradients(self.critic_model.output,
                                         self.critic_action_input)

        self.noise = OrnsteinUhlenbeckProcess(size=self.action_dim)
        self.noise.reset()

        self.sess.run(tf.initialize_all_variables())

예제 #2

파일 보기

파일: td3.py 프로젝트: henrytomsf/ReinforcementLearning

    def __init__(self, env, sess, low_action_bound_list,
                 high_action_bound_list):
        self.env = env
        self.sess = sess
        self.low_action_bound_list = low_action_bound_list  # depends on the env
        self.high_action_bound_list = high_action_bound_list
        self.action_range_bound = [
            hi - lo for hi, lo in zip(self.high_action_bound_list,
                                      self.low_action_bound_list)
        ]
        self.learning_rate = 0.0001
        self.exploration_noise = 0.1
        self.gamma = 0.90
        self.tau = 0.01
        self.buffer_size = 10000
        self.batch_size = 128
        self.policy_noise = 0.1
        self.noise_clip = 0.05
        self.exploration_episodes = 10
        # self.policy_freq = 2

        self.state_dim = self.env.observation_space.shape[0]
        self.action_dim = len(self.low_action_bound_list
                              )  #self.env.action_space, make this into input
        self.continuous_action_space = True

        # Initialize replay buffer
        self.replay_buffer = ReplayBuffer(self.buffer_size)

        # Creating ACTOR model
        actor_ = Actor(self.state_dim, self.action_dim, self.learning_rate)
        self.actor_state_input, self.actor_model = actor_.create_actor_model()
        _, self.target_actor_model = actor_.create_actor_model()

        self.actor_critic_grad = tf.placeholder(tf.float32,
                                                [None, self.action_dim])

        actor_model_weights = self.actor_model.trainable_weights
        self.actor_grads = tf.gradients(self.actor_model.output,
                                        actor_model_weights,
                                        -self.actor_critic_grad)

        grads = zip(self.actor_grads, actor_model_weights)
        self.optimize = tf.train.AdamOptimizer(
            self.learning_rate).apply_gradients(grads)

        # Creating FIRST CRITIC model, this is the one we train/optimize against
        critic_ = Critic(self.state_dim, self.action_dim, self.learning_rate)
        self.critic_state_input, self.critic_action_input, self.critic_model = critic_.create_critic_model(
        )
        self.critic_model.compile(optimizer=Adam(lr=critic_.learning_rate),
                                  loss='')

        _, _, self.target_critic_model = critic_.create_critic_model()
        self.target_critic_model.compile(
            optimizer=Adam(lr=critic_.learning_rate), loss='')

        self.critic_grads = tf.gradients(self.critic_model.output[0],
                                         self.critic_action_input)

        self.sess.run(tf.initialize_all_variables())