Esempio n. 1
0
            step += 1
            with t.no_grad():
                old_state = state
                # agent model inference
                action = dqn.act_discrete_with_noise({"state": old_state})
                state, reward, terminal, _ = env.step(action.item())
                state = t.tensor(state, dtype=t.float32).view(1, observe_dim)
                total_reward += reward

                dqn.store_transition({
                    "state": {
                        "state": old_state
                    },
                    "action": {
                        "action": action
                    },
                    "next_state": {
                        "state": state
                    },
                    "reward": reward,
                    "terminal": terminal or step == max_steps
                })

        # update, update more if episode is longer, else less
        if episode > 100:
            for _ in range(step):
                dqn.update()

        # show reward
        smoothed_total_reward = (smoothed_total_reward * 0.9 +
                                 total_reward * 0.1)
        out = dqn.act_discrete_with_noise({"state": torch.tensor(state, dtype=torch.float32).unsqueeze(0)})
        action = out.squeeze().detach().numpy()/num_actions - env.h
        new_state, reward, done = env.step(action)
        #print(action)
        #print(reward)
        reward = np.sum(reward)
        #print(reward)
        new_state = new_state[[0, 1, 2, 4]]
        #print(state)
        reward = -norm_factor*((reward) ** 2 + 1 / 1000 * reward)
        rew.append(reward)

        dqn.store_transition({
            "state": {"state": torch.tensor(state, dtype=torch.float32).unsqueeze(0)},
            "action": {"action": out},
            "next_state": {"state": torch.tensor(new_state, dtype=torch.float32).unsqueeze(0)},
            "reward": float(reward),  # norm factor
            "terminal": done
        })
        state = new_state

    if j % 50 == 0 and j != 0:
        print(test(10), test_delta(10))
        print("reward: ", np.mean(rew), np.mean(rew)/norm_factor)
        rew = []

    if j > 100:
        for _ in range(int(num_steps)):
            dqn.update()

#dqn.save("dqn_model_1000")