Beispiel #1
0
            state_ = rgb2dataset(state_)

            model.memory(state, action, reward, done)
            accum_reward += reward
            model.step += 1
            state = state_

            # Transition
            transition.append(state)
            if len(transition) > 4:
                transition.pop(0)

            if model.step > model.train_start_step and model.step % model.train_step_interval:
                model.train()
                if model.step % model.target_update_interval == 0:
                    model.update_target()

            if is_render:
                env.render()

            if done:

                writer.add_scalar('reward/accum', accum_reward, model.step)
                writer.add_scalar('data/epsilon', model.epsilon, model.step)
                writer.add_scalar('data/x_pos', info['x_pos'], model.step)
                print(
                    "Episode : %5d\t\tSteps : %10d\t\tReward : %7d\t\tX_step : %4d\t\tEpsilon : %.3f"
                    % (model.episode, model.step, accum_reward, info['x_pos'],
                       model.epsilon))

                if save_model and model.episode % 100 == 0:
Beispiel #2
0
gamma      = 0.99
lr = 3e-4
buffer_size = 50000
learning_starts = 300
grad_clip = 10
plot_freq = 1000

losses = []
all_rewards = []
episode_reward = 0
saved_mean_reward = None

#create dqn
dqn = DQN(observation_size, num_actions,device=device,lr=lr,dueling=True,gamma=gamma)
#update the dqn target network to match weights
dqn.update_target()

replay_buffer = ReplayBuffer(buffer_size) 

target_network_update_freq = 200
train_freq = 1
checkpoint_freq = 3000
num_episodes=0
model_file = os.path.join(os.getcwd(),"turtlebot_model_test")

state = env.reset()
ep_no = 0 #epsiode number counter
teleop=False
#teleop=True

if teleop==False: #RL Learning happens, no teleop mode