Exemple #1
0
def main():

    env = gym.make("LanderCustom-v0")

    qnetwork = QNetwork(state_size=8, action_size=4, seed=0)
    qnetwork.load_state_dict(torch.load('basic_lander.pth'))
    qnetwork.eval()

    human = MLP()
    human.load_state_dict(torch.load('expert_bc.pt'))
    human.eval()
    softmax = torch.nn.Softmax(dim=0)

    episodes = 30
    scores = []
    Q_threshold = 1e-2

    for episode in range(episodes):

        if episode < 10:
            force_x = 0.0
        elif episode < 20:
            force_x = +500.0
        else:
            force_x = -500.0

        env.start_state(force_x, 0.0)
        state = env.reset()
        score = 0

        while True:

            with torch.no_grad():
                state = torch.from_numpy(state).float()
                Q_values = qnetwork(state).data.numpy()
                action_pred_dist = softmax(human(state).data).numpy()
            action_star = np.argmax(Q_values)
            action = np.random.choice(np.arange(4), p=action_pred_dist)

            loss = Q_values[action_star] - Q_values[action]
            # if loss > Q_threshold:
            #     action = action_star

            # env.render()
            state, reward, done, _ = env.step(action)
            score += reward
            if done:
                print("episode: ", episode, "score: ", score)
                break

        scores.append(score)

    env.close()
    print("The average score is: ", np.mean(np.array(scores)))
Exemple #2
0
    def __init__(self):
        super(R_MLP, self).__init__()

        self.name = "models/r_model.pt"
        self.n_steps = 10

        self.human = MLP()
        model_dict = torch.load("models/h_model.pt", map_location='cpu')
        self.human.load_state_dict(model_dict)
        self.human.eval

        self.rc_1 = nn.Linear(4, 8)
        self.rc_2 = nn.Linear(8, 8)
        self.rc_3 = nn.Linear(8, 2)
Exemple #3
0
    def __init__(self):
        super(I_MLP_MLP, self).__init__()

        # curr human
        self.bc_human = MLP()
        model_dict = torch.load(self.bc_human.name, map_location='cpu')
        self.bc_human.load_state_dict(model_dict)
        self.bc_human.eval

        self.name = "models/inf_robot.pt"

        self.fc_1 = nn.Linear(4, 4)
        self.fc_2 = nn.Linear(4, 2)

        self.rc_1 = nn.Linear(4, 4)
        self.rc_2 = nn.Linear(4, 2)
Exemple #4
0
def main():

    env = gym.make("LanderCustom-v0")
    fx_init = float(sys.argv[1])
    Q_threshold = float(sys.argv[2])
    savename = 'test1.pkl'

    joystick = Joystick()
    qnetwork = QNetwork(state_size=8, action_size=4, seed=0)
    qnetwork.load_state_dict(torch.load('basic_lander.pth'))
    qnetwork.eval()

    human = MLP()
    human.load_state_dict(torch.load('mlp_model.pt'))
    human.eval()

    episodes = 10
    scores = []
    data = []
    env.start_state(fx_init, 0.0)

    for episode in range(episodes):

        state = env.reset()
        env.render()
        score = 0

        while True:

            action, start, stop = joystick.input()
            if start:
                break

        while True:


            action, start, stop = joystick.input()
            data.append(list(state) + [action])

            with torch.no_grad():
                state = torch.from_numpy(state).float()
                Q_values = qnetwork(state).data.numpy()
                action_pred_dist = human(state).data.numpy()
            action_star = np.argmax(Q_values)
            action_pred = np.argmax(action_pred_dist)

            # action = action_pred

            loss = Q_values[action_star] - Q_values[action]
            if loss > Q_threshold:
                action = action_star

            env.render()
            state, reward, done, _ = env.step(action)
            score += reward

            if done or stop:
                print(episode, score)
                # pickle.dump(data, open(savename, "wb" ))
                break
            time.sleep(0.025)

        scores.append(score)

    env.close()
    print(scores)