Python Actor.save Examples

Programming Language: Python

Namespace/Package Name: Actor

Class/Type: Actor

Method/Function: save

Examples at hotexamples.com: 2

Python Actor.save - 2 examples found. These are the top rated real world Python examples of Actor.Actor.save extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Actor(30)

__init__(15)

train(15)

parameters(14)

choose_action(8)

learn(8)

predict(7)

train_fn(6)

eval(6)

state_dict(5)

load_state_dict(4)

load(4)

_loss_(3)

update(3)

setPos(2)

ChooseActionByPolicy(2)

save(2)

setcollaabb(2)

sethitaabb(2)

get_shortest_actor(2)

model(2)

build_net(2)

doAttack(1)

target_predict(1)

save_cache(1)

setLocation(1)

addtriumphs(1)

setUnqTags(1)

set_brain(1)

set_goal(1)

setaabb(1)

addedToWorld(1)

add_grad_to_graph(1)

addMovie(1)

stop(1)

take_damage_shortest_point(1)

target_actions(1)

target_predict_method(1)

run(1)

transfer_to_actor_model(1)

update_target(1)

update_actor_target(1)

updatePolicy(1)

act(1)

unpickleActors(1)

triumph(1)

train_p(1)

target_update_method(1)

action(1)

train_2(1)

Example #1

Show file

File: DDPG.py Project: sunhemeng/master-thesis

class DDPGAgent:
    def __init__(self,
                 state_size=28,
                 action_size=2,
                 gamma=0.9,
                 learning_rate_actor=0.0001,
                 learning_rate_critic=0.01,
                 tau=0.001,
                 action_max=[1000, 2],
                 batch_size=32):
        self.state_size = state_size
        self.action_size = action_size
        self.action_max = action_max
        self.batch_size = batch_size
        self.memory = deque(maxlen=5000)
        self.gamma = gamma  # discount rate
        self.learning_rate_actor = learning_rate_actor  # learning rate
        self.learning_rate_critic = learning_rate_critic
        self.tau = tau  # target transfer factor
        self.gpu_options = tf.GPUOptions()
        self.config = tf.ConfigProto(gpu_options=self.gpu_options)
        self.config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=self.config)
        K.set_session(self.sess)
        self.actor = Actor(state_size=self.state_size,
                           action_size=self.action_size,
                           learning_rate=self.learning_rate_actor,
                           tau=self.tau,
                           sess=self.sess,
                           batch_size=self.batch_size,
                           action_max=self.action_max)
        self.critic = Critic(state_size=self.state_size,
                             action_size=self.action_size,
                             learning_rate=self.learning_rate_critic,
                             gamma=self.gamma,
                             tau=self.tau,
                             sess=self.sess,
                             batch_size=self.batch_size)
        self.grad_avg = 0
        self.grad_a = []
        self.critic_loss_a = []
        #self.critic_2 = Critic_2(self.state_size, self.action_size, self.learning_rate_critic, self.gamma, self.tau, self.sess)

    def policy_action(self, state):
        '''
        Actor predicts new action
        :param state:
        :return: action
        '''
        return self.actor.predict(state)[0]

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        states = np.asarray([e[0] for e in minibatch])
        actions = np.asarray([e[1] for e in minibatch])
        rewards = np.asarray([e[2] for e in minibatch])
        next_states = np.asarray([e[3] for e in minibatch])

        states = np.asarray(states).reshape(batch_size, self.state_size)
        actions = np.asarray(actions).reshape(batch_size, self.action_size)
        rewards = np.asarray(rewards).reshape(batch_size, 1)
        next_states = np.asarray(next_states).reshape(batch_size,
                                                      self.state_size)
        tar_pre = self.actor.target_predict(next_states)
        Qvals = self.critic.target_predict(next_states, tar_pre)
        Q_primes = rewards + (self.gamma * Qvals)  # Bellman equation
        self.update_models(states, actions, Q_primes)

    def update_models(self, states, actions, critic_target):
        '''
        Update actor and critic networks from sampled experience
        :param states:
        :param actions:
        :param critic_target:
        :return:
        '''
        loss = self.critic.train_on_batch(states, actions,
                                          critic_target)  # Train Critic
        self.critic_loss_a.append(loss)
        # loss = np.sum(-np.log10(loss), axis=0)
        act = self.actor.predict(
            states)  # Q Value Gradient under Current Policy
        grads = self.critic.gradients(states, act)  # actor loss

        self.grad_avg += np.sum(np.log10(np.absolute(grads)),
                                axis=0) / self.batch_size
        self.grad_a = np.append(self.grad_a,
                                np.sum(np.absolute(grads), axis=0) /
                                self.batch_size,
                                axis=0)
        # print('grad_a:', self.grad_a)

        self.actor.train_2(states, grads.reshape(
            (-1, self.action_size)))  # Train actor

        self.actor.transfer_to_actor_model(
        )  # Transfer weights to target networks at rate tau
        self.critic.transfer_to_critic_model()

    def remember(self, state, action, reward, next_state):
        self.memory.append((state, action, reward, next_state))

    def save_weights(self, directory, params):
        path_actor = directory + 'Weights' + params + '_LR'.format(
            self.learning_rate_actor)
        path_critic = directory + 'Weights' + params + '_LR'.format(
            self.learning_rate_critic)
        self.actor.save(path_actor)
        self.critic.save(path_critic)

    def load_weights(self, path_actor, path_critic):
        self.actor.load_weigths(path_actor)
        self.critic.load_weights(path_critic)

    def load_model(self, path_actor, path_critic):
        self.actor.model.load_model(path_actor)
        self.critic.model.load_model(path_critic)

Example #2

Show file

File: PPO.py Project: war3gu/gykRL

def train():

    env = gym.make('LunarLander-v2')

    state = env.reset()

    actor = Actor(env.action_space, env.observation_space)

    critic = Critic(env.action_space, env.observation_space)

    actor.load()
    critic.load()

    replayMemory = ReplayMemory()

    summary_ops, summary_vars = build_summaries()

    writer = tf.summary.FileWriter("./log", tf.Session().graph)

    episode_reward = 0

    step = 1

    while True:

        #env.render()

        state1 = state[np.newaxis, :]

        action, action_matrix, prob = actor.predict(state1)

        next_state, reward, done, info = env.step(action)

        replayMemory.add(state, action_matrix, reward, done, next_state, prob)

        state = next_state

        episode_reward += reward

        #train
        if replayMemory.size() % 128 == 0 or done == True:

            state_b, action_matrix_b, reward_b, done_b, next_state_b, prob_b = replayMemory.miniAll(
            )

            reward_b = reward_b[:, np.newaxis]

            c_pre = critic.predict(next_state_b)

            state_pre_value = reward_b + c_pre * 0.7

            state_value = critic.predict(state_b)

            count = 5000 // step

            if count > 500:
                count = 500

            if count < 1:
                count = 1

            count = 10

            for _ in range(count):
                critic.train(state_b, state_pre_value)

            for _ in range(count):
                actor.train(state_b, state_value, state_pre_value,
                            action_matrix_b, prob_b)

            replayMemory.clear()
        ########################

        if done:

            summary_str = tf.Session().run(
                summary_ops, feed_dict={summary_vars[0]: episode_reward})
            writer.add_summary(summary_str, step)
            writer.flush()

            ##print("step = ", step, "episode_reward = ", episode_reward)

            state = env.reset()

            episode_reward = 0

            step += 1

            if step % 25 == 0:
                actor.save()
                critic.save()