state = next_state
            episode_reward += reward

            if done:
                args_k = k_by_reward(episode_reward)
                state = env.reset()
                all_rewards.append(episode_reward)
                episode_reward = 0

            if len(replay_buffer) > batch_size:
                loss = improved_td_loss(batch_size, replay_buffer,
                                        current_model, target_model, gamma,
                                        args_k, optimizer)
                losses.append(loss.data[0])

            if frame_idx % 1000 == 0:
                update_target(current_model, target_model)

            if frame_idx % 1000 == 0:
                print("This is %d frame of 1000000, k_end %f, reward is %f" %
                      (frame_idx, k_end * 2., all_rewards[-1]))

        losses_all.append(losses)
        rewards_all.append(all_rewards)

    str = "data/analysis/CartPolerl1_kend%f" % (k_end * 2.)

    mean_losses, var_losses = tl.StatShrink2D(losses_all)
    mean_rewards, var_rewards = tl.StatShrink2D(rewards_all)
    tl.save2D4list(mean_losses, var_losses, mean_rewards, var_rewards, str)
        next_state, reward, done, _ = env.step(action)
        replay_buffer.push(state, action, reward, next_state, done)

        state = next_state
        episode_reward += reward

        if done:
            state = env.reset()
            all_rewards.append(episode_reward)
            episode_reward = 0

        if len(replay_buffer) > batch_size:
            loss = compute_td_loss1(batch_size, replay_buffer, current_model,
                                    target_model, gamma, optimizer)
            losses.append(loss.data[0])

        if frame_idx % 1000 == 0:
            update_target(current_model, target_model)

        if frame_idx % 1000 == 0:
            print("This is %d frame of 1000000, range %d, reward is %f" %
                  (frame_idx, 1, all_rewards[-1]))

    losses_all.append(losses)
    rewards_all.append(all_rewards)

mean_losses, var_losses = tl.StatShrink2D(losses_all)
mean_rewards, var_rewards = tl.StatShrink2D(rewards_all)
tl.save2D4list(mean_losses, var_losses, mean_rewards, var_rewards,
               "data/noisydqn_CartPole_five1.npz")
    replay_buffer = tl.BaseReplayBuffer(capacity)
    update_target(current_model, target_model)
    for frame_idx in range(1, num_frames + 1):
        args_k = 0.
        action = current_model.act(state)
        next_state, reward, done, _ = env.step(action)
        replay_buffer.push(state, action, reward, next_state, done)
        state = next_state
        episode_reward += reward

        if done:
            args_k = k_by_reward(episode_reward)
            state = env.reset()
            all_rewards.append(episode_reward)
            episode_reward = 0

        if len(replay_buffer) > replay_initial:
            loss = improved_td_loss(batch_size, replay_buffer, current_model,
                                    target_model, gamma, args_k, optimizer)
            losses.append(loss.data[0])

        if frame_idx % updatefrc == 0:
            update_target(current_model, target_model)

        if frame_idx % 2000 == 0:
            print("This is %d frame of 1000000, k_end %d, reward is %f" %
                  (frame_idx, k_end, all_rewards[-1]))

    save_dir = "data/analysis/rl1_kend%d" % (k_end * 2)
    tl.save2D4list(losses, [], all_rewards, [], save_dir)
Beispiel #4
0
        next_state, reward, done, _ = env.step(action)
        replay_buffer.push(state, action, reward, next_state, done)
        state = next_state
        episode_reward += reward

        if done:
            args_k = k_by_reward(episode_reward)
            state = env.reset()
            all_rewards.append(episode_reward)
            episode_reward = 0

        if len(replay_buffer) > replay_initial:
            loss = improved_td_loss(batch_size, replay_buffer, current_model,
                                    target_model, gamma, args_k, optimizer)
            losses.append(loss.data[0])

        if frame_idx % updatefrc == 0:
            update_target(current_model, target_model)

        if frame_idx % 2000 == 0:
            print("This is %d frame of 1000000, k_end %d, reward is %f" %
                  (frame_idx, k_end * 2, all_rewards[-1]))

    save_dir = "data/analysis/rl05_kend%d" % (k_end * 2)
    tl.save2D4list(losses, [], all_rewards, [], save_dir)

mean_losses, var_losses = tl.StatShrink2D(losses_all)
mean_rewards, var_rewards = tl.StatShrink2D(rewards_all)
#tl.save2D4list2(mean_losses, var_losses, mean_rewards, var_rewards, weight_sigmas, bais_sigmas, frame_list, save_dir)
tl.save2D4list(mean_losses, var_losses, mean_rewards, var_rewards, save_dir)