예제 #1
0
 def __init__(self, model, window_size=32, *args, **kwargs):
     self.initial_model = model  # Initial model provided
     self.model = GMM()
     self.model.copy_model(self.initial_model)  # Model used for training
     self.window_size = window_size
     self.burn_in_steps = 1000
     super(SAC_GMM_Agent, self).__init__(*args, **kwargs)
예제 #2
0
def main(cfg):
    logger = logging.getLogger('tacto.renderer')
    logger.propagate = False
    
    env = custom_sawyer_peg_env(cfg.env)
    for model_name in cfg.model_names:
        print(model_name)
        model = GMM(add_cwd(model_name))
        accuracy, mean_return, mean_length = model.evaluate(env=env, **cfg.test)
        logger = logging.getLogger(__name__)
        logger.info("Accuracy: %.2f, Mean return: %.2f, mean length: %.2f" % (accuracy, mean_return, mean_length))
예제 #3
0
def main(cfg):
    logger = logging.getLogger('tacto.renderer')
    logger.propagate = False
    logger = logging.getLogger('env.sawyer_peg_env')
    logger.propagate = False
    logger = logging.getLogger('pybulletX._wrapper')
    logger.propagate = False
    #Hyperparams
    type = "pose"  # "pose" or "force"
    demonstration_dir = add_cwd("demonstrations_txt")
    K = 3
    budget = 20

    #Start matlab
    log_likelihood = []
    best_ret = 0
    if not drlfads.USE_MATLAB:
        raise NotImplementedError(f'This function requires matlab')

    eng = matlab.engine.start_matlab()
    eng.addpath(add_cwd(str(Path(__file__).parents[0])))
    env = custom_sawyer_peg_env(cfg.env)
    for _ in range(budget):
        name = "gmm_peg_%s_%d" % (type, K)
        bll = eng.train_model(demonstration_dir, name, type, K, 1)
        print("model trained, final log likelihood:", bll)

        # Test new configurations
        if not bll in log_likelihood:
            # Evaluate model in actual environment
            log_likelihood.append(bll)
            model = GMM(name + ".mat")
            accuracy, mean_return, mean_length = model.evaluate(env=env,
                                                                **cfg.test)
            print("Accuracy:", accuracy, "Mean return:", mean_return,
                  "Mean length:", mean_length)
            if mean_return > best_ret:
                print("Best model so far!")
                best_ret = mean_return
                model.save_model(name + ".npy")

    eng.quit()
예제 #4
0
 def objective(self, x):
     model = GMM()
     model.copy_model(self.initial_model)
     model.update_gaussians(np.asarray(x))
     accuracy, mean_return, mean_length = model.evaluate(self.env,
                                                         max_steps=600,
                                                         num_episodes=1)
     print("Accuracy:", accuracy, "mean_return:", mean_return)
     return -mean_return
def main(cfg):
    # Do not show tacto renderer output
    logger = logging.getLogger('tacto.renderer')
    logger.propagate = False

    env = custom_sawyer_peg_env(cfg.env)
    gmm_model = GMM(add_cwd(cfg.gmm_model))
    agent = SAC_GMM_Agent(env=env, model=gmm_model, **cfg.agent)
    agent.load(add_cwd(cfg.test.model_name))
    stats = agent.evaluate(**cfg.test.run)
    logger = logging.getLogger(__name__)
    logger.info(stats)
    agent.env.close()
예제 #6
0
def main(cfg):
    # Do not show tacto renderer output
    logger = logging.getLogger('tacto.renderer')
    logger.propagate = False

    for i in range(cfg.train.num_random_seeds):
        # Training
        env = custom_sawyer_peg_env(cfg.env)
        gmm_model = GMM(str(Path(cfg.gmm_model).absolute()))
        agent = SAC_GMM_Agent(env=env, model=gmm_model, **cfg.agent)
        save_filename = get_save_filename("sac_gmm", cfg, i)
        agent.train(**cfg.train.run, save_filename=save_filename)
        agent.env.close()

        # Testing
        agent.env = custom_sawyer_peg_env(cfg.env)
        agent.evaluate(**cfg.test.run)
        agent.env.close()
예제 #7
0
def main():
    # Environment hyperparameters
    env_params = {
        "show_gui": False,
        "with_force": False,
        "with_joint": False,
        "relative": True,
        "with_noise": False,
        "dt": 0.05
    }
    env = custom_sawyer_peg_env(**env_params)

    # Evaluation parameters
    model_name = "models/GMM_models/gmm_peg_v2_pose_9.npy"
    model = GMM(model_name)

    optimizer = GMMOptimizer(env, model)
    res = optimizer.optimize()
    print(res.x)
    model.update_gaussians(np.asarray(res.x))
    new_model_name = "models/optimizer/test.npy"
    model.save_model(new_model_name)
    print("Best model - Average reward:", -res.fun)
    print("Model saved as:", new_model_name)
예제 #8
0
class SAC_GMM_Agent(SAC_Agent):
    def __init__(self, model, window_size=32, *args, **kwargs):
        self.initial_model = model  # Initial model provided
        self.model = GMM()
        self.model.copy_model(self.initial_model)  # Model used for training
        self.window_size = window_size
        self.burn_in_steps = 1000
        super(SAC_GMM_Agent, self).__init__(*args, **kwargs)

    def get_action_space(self):
        if not hasattr(self, 'action_space'):
            priors_high = np.ones(self.model.priors.size)
            mu_high = np.ones(self.model.mu.size)
            action_high = np.concatenate((priors_high, mu_high), axis=-1)
            action_low = -action_high
            self.action_space = gym.spaces.Box(action_low, action_high)
        return self.action_space

    def update_gaussians(self, gmm_change):
        # change of priors range: [-0.1, 0.1]
        priors = gmm_change[:self.model.priors.size]
        priors = priors.reshape(self.model.priors.shape) * 0.1
        # change of mus range: [-0.01, 0.01]
        mu = gmm_change[self.model.priors.size:]
        mu = mu.reshape(self.model.mu.shape) * 0.01
        change_dict = {"mu": mu, "prior": priors}
        self.model.update_gaussians(change_dict)

    def evaluate(self, num_episodes=5, render=False):
        succesful_episodes, episodes_returns, episodes_lengths = 0, [], []
        for episode in range(1, num_episodes + 1):
            observation = self.env.reset()
            episode_return, episode_length, left_steps = 0, 0, self.env.max_episode_steps
            while left_steps > 0:
                self.model.copy_model(self.initial_model)
                gmm_change = self.get_action_from_observation(
                    observation, deterministic=True)
                self.update_gaussians(gmm_change)
                model_reward = 0
                for step in range(self.window_size):
                    vel = self.model.predict_velocity_from_observation(
                        observation)
                    observation, reward, done, info = self.env.step(vel)
                    model_reward += reward
                    episode_length += 1
                    left_steps -= 1
                    if render:
                        self.env.render()
                    if done or left_steps <= 0:
                        break
                episode_return += model_reward
                if done:
                    break
                if render:
                    self.env.render()
            if ("success" in info) and info['success']:
                succesful_episodes += 1
            episodes_returns.append(episode_return)
            episodes_lengths.append(episode_length)
        accuracy = succesful_episodes / num_episodes
        return accuracy, np.mean(episodes_returns), np.mean(episodes_lengths)

    def train_episode(self, episode, exploration_episodes, log, render):
        sac_steps = 0
        episode_return, episode_length, left_steps = 0, 0, self.env.max_episode_steps
        ep_critic_loss, ep_actor_loss, ep_alpha_loss = 0, 0, 0
        observation = self.env.reset()
        while left_steps > 0:
            self.model.copy_model(self.initial_model)
            if self.training_step < self.burn_in_steps:
                gmm_change = np.zeros(self.action_space.shape)
            else:
                gmm_change = self.get_action_from_observation(
                    observation, deterministic=False)
            self.update_gaussians(gmm_change)
            model_reward = 0
            curr_observation = observation
            for step in range(self.window_size):
                vel = self.model.predict_velocity_from_observation(
                    curr_observation)
                curr_observation, reward, done, info = self.env.step(vel)
                model_reward += reward
                episode_length += 1
                left_steps -= 1
                if render:
                    self.env.render()
                if done or left_steps <= 0:
                    break
            critic_loss, actor_loss, alpha_loss = self.update(
                observation, gmm_change, curr_observation, model_reward, done,
                log)
            observation = curr_observation
            episode_return += model_reward
            ep_critic_loss += critic_loss
            ep_actor_loss += actor_loss
            ep_alpha_loss += alpha_loss
            self.training_step += 1  # SAC_Steps in total
            sac_steps += 1  # SAC_Steps in this episode

            if render:
                self.env.render()
            if done:
                break

        if log:
            self.log_scalar('Train/Episode/critic_loss',
                            ep_critic_loss / sac_steps, episode)
            self.log_scalar('Train/Episode/actor_loss',
                            ep_actor_loss / sac_steps, episode)
            self.log_scalar('Train/Episode/alpha_loss',
                            ep_alpha_loss / sac_steps, episode)
            self.log_episode_information(episode_return, episode_length,
                                         episode, "Train")

        return episode_return, episode_length