예제 #1
0
def plot(actor):
    paths = []
    actions_plot = []
    env = MultiGoalEnv()
    n_games = 50
    max_episode_length = 30
    for i in range(n_games):
        observation = env.reset(init_state=[-3, 0])
        episode_length = 0
        done = False
        score = 0
        path = {'infos': {'pos': []}}
        while not done:
            env.render()
            #print('state: ', np.squeeze(observation))
            action, _ = actor.forward(T.Tensor([observation]).to(actor.device))
            action = action.cpu().detach().numpy()[0]
            #print('ac: ', action[0].cpu().detach().numpy())
            observation_, reward, done, info = env.step(action)
            path['infos']['pos'].append(observation)

            if episode_length == max_episode_length:
                done = True
            episode_length += 1

            #print('re:', reward)
            score += reward
            observation = observation_
        paths.append(path)

    env.render_rollouts(paths, fout="test_%d.png" % i)
예제 #2
0
def plot(agent):
    paths = []
    actions_plot = []
    env = MultiGoalEnv()
    n_games = 50
    max_episode_length = 20
    for i in range(n_games):
        observation = env.reset(init_state=[0, 0])
        episode_length = 0
        done = False
        score = 0
        path = {'infos': {'pos': []}}
        while not done:
            env.render()
            #print('state: ', np.squeeze(observation))
            action = agent.choose_action(observation)
            #print('ac: ', action[0].cpu().detach().numpy())
            observation_, reward, done, info = env.step(action)
            path['infos']['pos'].append(observation)

            if episode_length == max_episode_length:
                done = True
            episode_length += 1

            #print('re:', reward)
            score += reward
            observation = observation_
        paths.append(path)

        score = score / 200
        score_history.append(score)
        avg_score = np.mean(score_history[-20:])

    env.render_rollouts(paths, fout="test_%d.png" % i)
예제 #3
0
    def plot_paths(self, epoch):
        paths = []
        actions_plot = []
        env = MultiGoalEnv()

        for episode in range(50):
            observation = env.reset()
            done = False
            step = 0
            path = {'infos': {'pos': []}}
            particles = None
            while not done and step < 30:

                self.SVGD_Network.eval()
                actions = self.get_sample(observation, 1)
                self.SVGD_Network.train()

                observation, reward, done, _ = env.step(actions)
                path['infos']['pos'].append(observation)
                step += 1
                paths.append(path)
        print("saving figure..., epoch=", epoch)

        env.render_rollouts(paths, fout="test_%d.png" % epoch)
예제 #4
0
    #print(env.action_space.high)

    best_score = env.reward_range[0]
    score_history = []
    load_checkpoint = False

    max_episode_length = 30

    if load_checkpoint:
        agent.load_models()
        env.render(mode='human')

    for i in range(n_games):

        observation = env.reset(init_state=[0, 0])
        episode_length = 0

        done = False
        score = 0
        while not done:
            env.render()
            #print('state: ', np.squeeze(observation))
            action = agent.choose_action(observation)
            #print('ac: ', np.squeeze(action))
            observation_, reward, done, info = env.step(action)

            if episode_length == max_episode_length:
                done = True
            episode_length += 1
                  replay_size=int(1e6),
                  pi_lr=1e-3,
                  q_lr=1e-3,
                  batch_size=100,
                  n_particles=16,
                  gamma=0.99,
                  polyak=0.995)

    epochs = 100
    update_after = 0
    max_ep_len = 30
    steps_per_epoch = 400
    # Prepare for interaction with environment
    total_steps = steps_per_epoch * epochs

    o, ep_ret, ep_len = env.reset(), 0, 0

    # Main loop: collect experience in env and update/log each epoch
    n_particles = 16
    epsilon = 0.8
    for t in range(total_steps):

        if epsilon > 0.2:
            epsilon -= 0.00001

        #a = agent.get_sample(o)
        if np.random.uniform(0, 1) > epsilon:
            a = agent.get_sample(o, n_sample=n_particles)
            # ind = np.random.choice(np.array([i for i in range(0, n_particles)]))
            # a = a[ind]
            Q_values = agent.Q_Network(T.tensor(o).float().unsqueeze(0).to(