Python DQNAgent.train_model Examples

Programming Language: Python

Namespace/Package Name: dqn

Class/Type: DQNAgent

Method/Function: train_model

Examples at hotexamples.com: 1

Python DQNAgent.train_model - 1 examples found. These are the top rated real world Python examples of dqn.DQNAgent.train_model extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DQNAgent(30)

act(13)

load(11)

compile(8)

fit(5)

save(5)

train(5)

replay(5)

test(4)

save_weights(4)

remember(4)

get_action(4)

load_model(4)

actDeterministically(4)

epsilon(3)

save_model(3)

load_weights(3)

target_model(2)

observe(2)

start(2)

get_last_observations(2)

end(2)

train_one_episode(1)

train_model(1)

trainAgent(1)

train_only(1)

update_epoch(1)

update_replay_memory(1)

test_one_episode(1)

test_model(1)

update_target(1)

store_transition(1)

train_rnn(1)

testAgent(1)

update_target_model(1)

train_vae(1)

training(1)

restart_epoch(1)

store_experience(1)

load_state_dict(1)

__init__(1)

act_2(1)

append_sample(1)

backword(1)

fill_memory(1)

get_test_loss(1)

learn(1)

loss(1)

step(1)

parameters(1)

Example #1

Show file

File: robot_environment.py Project: IntelliRBot/gazebo

    def train_dqn(self):
        # get size of state and action from environment
        state_size = 4
        action_size = 2

        agent = DQNAgent(state_size, action_size)

        scores, episodes = [], []

        for episode in range(1, EPISODES + 1):
            # run reinforcement learning for every episode
            done = False
            score = 0

            self.reset()
            state, _, _, _ = self.step(-1)
            state = np.reshape(state, [1, state_size])

            rospy.loginfo("Episode %d: starting", episode)

            while not done:
                action = agent.get_action(state)
                next_state, reward, done, _, = self.step(action)

                rospy.loginfo("Episode %d: action: %d pitch: %f", episode,
                              action, next_state[0])

                next_state = np.reshape(next_state, [1, state_size])
                # if an action make the episode end, then gives penalty of -100
                reward = reward if not done or score == 499 else -100

                # save the sample <s, a, r, s'> to the replay memory
                agent.append_sample(state, action, reward, next_state, done)
                # every time step do the training
                agent.train_model()
                score += reward
                state = next_state

                if done or score >= 500:
                    # every episode update the target model to be same with model
                    agent.update_target_model()

                    # every episode, plot the play time
                    score = score if score == 500 else score + 100
                    scores.append(score)
                    episodes.append(episode)
                    pylab.plot(episodes, scores, "b")
                    pylab.savefig("./cartpole_dqn.png")
                    print(
                        "episode:",
                        episode,
                        " score:",
                        score,
                        " memory length:",
                        len(agent.memory),
                        " epsilon:",
                        agent.epsilon,
                    )
                    break

                    # if the mean of scores of last 10 episode is bigger than 490
                    # stop training
                    if np.mean(scores[-min(10, len(scores)):]) > 490:
                        break

            # save the model
            if episode % 20 == 0:
                agent.model.save_weights("./cartpole_dqn.h5")

            rospy.loginfo("Episode %d: completed", episode)