Esempio n. 1
0
def generate_config():
    config = DQN.generate_config({})
    config["frame_config"]["models"] = ["QNet", "QNet"]
    config["frame_config"]["model_kwargs"] = [{
        "state_dim": 4,
        "action_num": 2
    }] * 2
    config["train_env_config"] = {}
    config["test_env_config"] = {}
    return config
Esempio n. 2
0
        self.fc1 = nn.Linear(state_dim, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, action_num)

    def forward(self, state):
        a = t.relu(self.fc1(state))
        a = t.relu(self.fc2(a))
        return self.fc3(a)


if __name__ == "__main__":
    q_net = QNet(observe_dim, action_num)
    q_net_t = QNet(observe_dim, action_num)

    dqn = DQN(q_net, q_net_t, t.optim.Adam, nn.MSELoss(reduction='sum'))

    episode, step, reward_fulfilled = 0, 0, 0
    smoothed_total_reward = 0

    while episode < max_episodes:
        episode += 1
        total_reward = 0
        terminal = False
        step = 0
        state = t.tensor(env.reset(), dtype=t.float32).view(1, observe_dim)

        while not terminal and step <= max_steps:
            step += 1
            with t.no_grad():
                old_state = state
Esempio n. 3
0
        self.fc1 = nn.Linear(state_dim, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, action_num)

    def forward(self, state):
        a = t.relu(self.fc1(state))
        a = t.relu(self.fc2(a))
        return self.fc3(a)


if __name__ == "__main__":
    q_net = QNet(observe_dim, action_num)
    q_net_t = QNet(observe_dim, action_num)

    dqn = DQN(q_net, q_net_t, t.optim.Adam, nn.MSELoss(reduction="sum"))

    episode, step, reward_fulfilled = 0, 0, 0
    smoothed_total_reward = 0

    while episode < max_episodes:
        episode += 1
        total_reward = 0
        terminal = False
        step = 0
        state = t.tensor(env.reset(), dtype=t.float32).view(1, observe_dim)
        tmp_observations = []

        while not terminal and step <= max_steps:
            step += 1
            with t.no_grad():
Esempio n. 4
0
# to mark the input/output device Manually
# will not work if you move your model to other devices
# after wrapping

# q_net = static_module_wrapper(q_net, "cpu", "cpu")
# q_net_t = static_module_wrapper(q_net_t, "cpu", "cpu")
# q_net = static_module_wrapper(q_net, device, device)
# q_net_t = static_module_wrapper(q_net_t, device, device)

# to mark the input/output device Automatically
# will not work if you model locates on multiple devices

q_net = dynamic_module_wrapper(q_net)
q_net_t = dynamic_module_wrapper(q_net_t)

dqn = DQN(q_net, q_net_t, t.optim.Adam, nn.MSELoss(reduction='sum'))


def fnTrain():
    episode, step, reward_fulfilled = 0, 0, 0
    smoothed_total_reward = 0
    iNumOfTrainSamples = env.fnNumIterations()
    afRewardArray = []
    fMaxRewardSum = -np.inf
    while episode < iNumOfTrainSamples:
        episode += 1
        total_reward = 0
        terminal = False
        step = 0
        state = t.tensor(env.reset(),
                         dtype=t.float32).view(1,
        super(QNet, self).__init__()

        self.fc1 = nn.Linear(state_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, action_num)

    def forward(self, state):
        a = torch.relu(self.fc1(state))
        a = torch.relu(self.fc2(a))
        return self.fc3(a)

qnet = QNet(4, 20, num_actions)
qnet_t = QNet(4, 20, num_actions)

dqn = DQN(qnet, qnet_t,
          torch.optim.Adam,
          nn.MSELoss(reduction='sum'), discount=0.8, epsilon_decay=0.999, learning_rate=0.001,
          lr_scheduler=torch.optim.lr_scheduler.StepLR, lr_scheduler_kwargs=[{"step_size": 1000*128}])

num_eps = 5000
norm_factor = 10000000


def test_delta(n=10):
    rew = []
    for i in range(n):
        state = env.reset()
        done = False
        state = state[[0, 1, 2, 4]]
        while not done:
            action = state[3] - env.h
            new_state, reward, done = env.step(action)