Python ACModel.eval 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: model

클래스/타입: ACModel

메소드/함수: eval

hotexamples.com에서의 예제들: 5

Python ACModel.eval - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 model.ACModel.eval에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

ACModel(24)

to(10)

load_state_dict(9)

state_dict(6)

eval(5)

build(1)

compile(1)

cpu(1)

cuda(1)

load_weights(1)

named_parameters(1)

predict(1)

record(1)

save_weights(1)

total_grad(1)

예제 #1

파일 보기

class Agent:
    """An agent.

    It is able:
    - to choose an action given an observation,
    - to analyze the feedback (i.e. reward and done state) of its action."""
    def __init__(self,
                 obs_space,
                 action_space,
                 model_dir,
                 device=None,
                 argmax=False,
                 num_envs=1,
                 use_rim=False):
        obs_space, self.preprocess_obss = utils.get_obss_preprocessor(
            obs_space)
        self.acmodel = ACModel(obs_space, action_space, use_rim=use_rim)
        self.device = device
        self.argmax = argmax
        self.num_envs = num_envs

        if self.acmodel.recurrent:
            self.memories = torch.zeros(self.num_envs,
                                        self.acmodel.memory_size).to(device)

        self.acmodel.load_state_dict(utils.get_model_state(model_dir))
        self.acmodel.to(self.device)
        self.acmodel.eval()
        if hasattr(self.preprocess_obss, "vocab"):
            self.preprocess_obss.vocab.load_vocab(utils.get_vocab(model_dir))

    def get_actions(self, obss):
        preprocessed_obss = self.preprocess_obss(obss, device=self.device)

        with torch.no_grad():
            if self.acmodel.recurrent:
                dist, _, self.memories = self.acmodel(preprocessed_obss,
                                                      self.memories)
            else:
                dist, _ = self.acmodel(preprocessed_obss)

        if self.argmax:
            actions = dist.probs.max(1, keepdim=True)[1]
        else:
            actions = dist.sample()

        return actions.cpu().numpy()

    def get_action(self, obs):
        return self.get_actions([obs])[0]

    def analyze_feedbacks(self, rewards, dones):
        if self.acmodel.recurrent:
            masks = 1 - torch.tensor(dones, dtype=torch.float).to(
                self.device).unsqueeze(1)
            self.memories *= masks

    def analyze_feedback(self, reward, done):
        return self.analyze_feedbacks([reward], [done])

예제 #2

파일 보기

파일: agent.py 프로젝트: shyamalschandra/automatic-curriculum

class Agent:
    """An agent.

    It is able:
    - to choose an action given an observation,
    - to analyze the feedback (i.e. reward and done state) of its action."""

    def __init__(self, obs_space, action_space, model_dir, device=None, argmax=False, num_envs=1):
        obs_space, self.preprocess_obss = utils.get_obss_preprocessor(obs_space)
        self.acmodel = ACModel(obs_space, action_space)
        self.device = device
        self.argmax = argmax
        self.num_envs = num_envs

        self.acmodel.load_state_dict(utils.get_model_state(model_dir))
        self.acmodel.to(self.device)
        self.acmodel.eval()

    def get_actions(self, obss):
        preprocessed_obss = self.preprocess_obss(obss, device=self.device)

        with torch.no_grad():
            dist, _ = self.acmodel(preprocessed_obss)

        if self.argmax:
            actions = dist.probs.max(1, keepdim=True)[1]
        else:
            actions = dist.sample()

        return actions.cpu().numpy()

    def get_action(self, obs):
        return self.get_actions([obs])[0]

    def analyze_feedbacks(self, rewards, dones):
        pass

    def analyze_feedback(self, reward, done):
        return self.analyze_feedbacks([reward], [done])

예제 #3

파일 보기

class Agent:
    def __init__(self,
                 env,
                 model_dir,
                 model_type='PPO2',
                 logger=None,
                 argmax=False,
                 use_memory=False,
                 use_text=False,
                 num_cpu=1,
                 frames_per_proc=None,
                 discount=0.99,
                 lr=0.001,
                 gae_lambda=0.95,
                 entropy_coef=0.01,
                 value_loss_coef=0.5,
                 max_grad_norm=0.5,
                 recurrence=1,
                 optim_eps=1e-8,
                 optim_alpha=None,
                 clip_eps=0.2,
                 epochs=4,
                 batch_size=256):
        """
        Initialize the Agent object.

        This primarily includes storing of the configuration parameters, but there is some other logic for correctly
        initializing the agent.

        :param env: the environment for training
        :param model_dir: the save directory (appended with the goal_id in initialization)
        :param model_type: the type of model {'PPO2', 'A2C'}
        :param logger: existing text logger
        :param argmax: if we use determinsitic or probabilistic action selection
        :param use_memory: if we are using an LSTM
        :param use_text: if we are using NLP to parse the goal
        :param num_cpu: the number of parallel instances for training
        :param frames_per_proc: max time_steps per process (versus constant)
        :param discount: the discount factor (gamma)
        :param lr: the learning rate
        :param gae_lambda: the generalized advantage estimator lambda parameter (training smoothing parameter)
        :param entropy_coef: relative weight for entropy loss
        :param value_loss_coef: relative weight for value function loss
        :param max_grad_norm: max scaling factor for the gradient
        :param recurrence: number of recurrent steps
        :param optim_eps: minimum value to prevent numerical instability
        :param optim_alpha: RMSprop decay parameter (A2C only)
        :param clip_eps: clipping parameter for the advantage and value function (PPO2 only)
        :param epochs: number of epochs in the parameter update (PPO2 only)
        :param batch_size: number of samples for the parameter update (PPO2 only)
        """
        if hasattr(
                env, 'goal'
        ) and env.goal:  # if the environment has a goal, set the model_dir to the goal folder
            self.model_dir = model_dir + env.goal.goalId + '/'
        else:  # otherwise just use the model_dir as is
            self.model_dir = model_dir

        # store all of the input parameters
        self.model_type = model_type
        self.num_cpu = num_cpu
        self.frames_per_proc = frames_per_proc
        self.discount = discount
        self.lr = lr
        self.gae_lambda = gae_lambda
        self.entropy_coef = entropy_coef
        self.value_loss_coef = value_loss_coef
        self.max_grad_norm = max_grad_norm
        self.recurrence = recurrence
        self.optim_eps = optim_eps
        self.optim_alpha = optim_alpha
        self.clip_eps = clip_eps
        self.epochs = epochs
        self.batch_size = batch_size

        # use the existing logger and create two new ones
        self.txt_logger = logger
        self.csv_file, self.csv_logger = utils.get_csv_logger(self.model_dir)
        self.tb_writer = tensorboardX.SummaryWriter(self.model_dir)

        self.set_env(
            env
        )  # set the environment to with some additional checks and init of training_envs

        self.algo = None  # we don't initialize the algorithm until we call init_training_algo()

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.txt_logger.info(f"Device: {device}\n")

        try:  # if we have a saved model, load it
            self.status = utils.get_status(self.model_dir)
        except OSError:  # otherwise initialize the status
            print('error loading saved model.  initializing empty model...')
            self.status = {"num_frames": 0, "update": 0}
        if self.txt_logger: self.txt_logger.info("Training status loaded\n")

        if "vocab" in self.status:
            preprocess_obss.vocab.load_vocab(self.status["vocab"])
        if self.txt_logger:
            self.txt_logger.info("Observations preprocessor loaded")

        # get the obs_space and the observation pre-processor
        # (for manipulating gym observations into a torch-friendly format)
        obs_space, self.preprocess_obss = utils.get_obss_preprocessor(
            self.env.observation_space)
        self.acmodel = ACModel(obs_space,
                               self.env.action_space,
                               use_memory=use_memory,
                               use_text=use_text)
        self.device = device  # store the device {'cpu', 'cuda:N'}
        self.argmax = argmax  # if we are using greedy action selection
        # or are we using probabilistic action selection

        if self.acmodel.recurrent:  # initialize the memories
            self.memories = torch.zeros(num_cpu,
                                        self.acmodel.memory_size,
                                        device=self.device)

        if "model_state" in self.status:  # if we have a saved model ('model_state') in the status
            # load that into the initialized model
            self.acmodel.load_state_dict(self.status["model_state"])
        self.acmodel.to(
            device)  # make sure the model is located on the correct device
        self.txt_logger.info("Model loaded\n")
        self.txt_logger.info("{}\n".format(self.acmodel))

        # some redundant code.  uncomment if there are issues and delete after enough testing
        #if 'model_state' in self.status:
        #    self.acmodel.load_state_dict(self.status['model_state'])
        #self.acmodel.to(self.device)
        self.acmodel.eval()
        if hasattr(self.preprocess_obss, "vocab"):
            self.preprocess_obss.vocab.load_vocab(utils.get_vocab(model_dir))

    def init_training_algo(self, num_envs=None):
        """
        Initialize the training algorithm.

        This primarily calls the object creation functions for the A2C or PPO2 and the optimizer, but this also spawns
        a number of parallel environments, based on the self.num_cpu or num_envs input (if provided).

        Note, the spawning of parallel environments is VERY slow due to deepcopying the termination sets.  I tried some
        work arounds, but nothing worked properly, so we are stuck with it for now.

        :param num_envs: an override for the default number of environments to spawn (in self.num_cpu)
        """
        if not num_envs:
            num_envs = self.num_cpu

        if self.model_type == "A2C":
            # check to make sure that the A2C parameters are set
            assert self.optim_alpha
            self.training_envs = [deepcopy(self.env) for i in range(num_envs)
                                  ]  # spawn parallel environments

            if self.acmodel.recurrent:
                self.memories = torch.zeros(num_envs,
                                            self.acmodel.memory_size,
                                            device=self.device)

            self.algo = torch_ac.A2CAlgo(
                self.training_envs, self.acmodel, self.device,
                self.frames_per_proc, self.discount, self.lr, self.gae_lambda,
                self.entropy_coef, self.value_loss_coef, self.max_grad_norm,
                self.recurrence, self.optim_alpha, self.optim_eps,
                self.preprocess_obss)
        elif self.model_type == "PPO2":
            # check to see if the PPO2 parameters are set
            assert self.clip_eps and self.epochs and self.batch_size
            self.training_envs = [deepcopy(self.env) for i in range(num_envs)
                                  ]  # spawn parallel environments

            if self.acmodel.recurrent:
                self.memories = torch.zeros(num_envs,
                                            self.acmodel.memory_size,
                                            device=self.device)

            self.algo = torch_ac.PPOAlgo(
                self.training_envs, self.acmodel, self.device,
                self.frames_per_proc, self.discount, self.lr, self.gae_lambda,
                self.entropy_coef, self.value_loss_coef, self.max_grad_norm,
                self.recurrence, self.optim_eps, self.clip_eps, self.epochs,
                self.batch_size, self.preprocess_obss)
        else:
            raise ValueError("Incorrect algorithm name: {}".format(algo_type))

        # load the optimizer state, if it exists
        if "optimizer_state" in self.status:
            self.algo.optimizer.load_state_dict(self.status["optimizer_state"])
        self.txt_logger.info("Optimizer loaded\n")

    def learn(self,
              total_timesteps,
              log_interval=1,
              save_interval=10,
              save_env_info=False,
              save_loc=None):
        """
        The primary training loop.

        :param total_timesteps: the total number of timesteps
        :param log_interval: the period between logging/printing updates
        :param save_interval: the number of updates between model saving
        :param save_env_info: if we save the environment info (termination set) VERY SLOW
        :return: True, if training is successful
        """
        self.init_training_algo(
        )  # initialize the training algo/environment list/optimizer

        if save_loc:
            print(
                'ignoring save_loc override.  if this is not intended, fix me')

        # initialize parameters
        self.num_frames = self.status["num_frames"]
        self.update = self.status["update"]
        start_time = time.time()

        # loop until we reach the desired number of timesteps
        while self.num_frames < total_timesteps:
            # Update model parameters

            update_start_time = time.time(
            )  # store the time (for fps calculations)
            exps, logs1 = self.algo.collect_experiences(
            )  # collect a number of data points for training
            logs2 = self.algo.update_parameters(
                exps)  # update the parameters based on the experiences
            logs = {**logs1, **logs2}  # merge the logs for printing
            update_end_time = time.time()

            self.num_frames += logs["num_frames"]
            self.update += 1

            # all of this messy stuff is just storing and printing the log info

            if self.update % log_interval == 0:
                fps = logs["num_frames"] / (update_end_time -
                                            update_start_time)
                duration = int(time.time() - start_time)
                return_per_episode = utils.synthesize(
                    logs["return_per_episode"])
                rreturn_per_episode = utils.synthesize(
                    logs["reshaped_return_per_episode"])
                num_frames_per_episode = utils.synthesize(
                    logs["num_frames_per_episode"])

                header = ["update", "frames", "FPS", "duration"]
                data = [self.update, self.num_frames, fps, duration]
                header += [
                    "rreturn_" + key for key in rreturn_per_episode.keys()
                ]
                data += rreturn_per_episode.values()
                header += [
                    "num_frames_" + key
                    for key in num_frames_per_episode.keys()
                ]
                data += num_frames_per_episode.values()
                header += [
                    "entropy", "value", "policy_loss", "value_loss",
                    "grad_norm"
                ]
                data += [
                    logs["entropy"], logs["value"], logs["policy_loss"],
                    logs["value_loss"], logs["grad_norm"]
                ]

                self.txt_logger.info(
                    "U {} | F {:06} | FPS {:04.0f} | D {} | rR:usmM {:.2f} {:.2f} {:.2f} {:.2f} | F:usmM {:.1f} {:.1f} {} {} | H {:.3f} | V {:.3f} | pL {:.3f} | vL {:.3f} | D {:.3f}"
                    .format(*data))

                header += [
                    "return_" + key for key in return_per_episode.keys()
                ]
                data += return_per_episode.values()

                if self.status["num_frames"] == 0:
                    self.csv_logger.writerow(header)
                self.csv_logger.writerow(data)
                self.csv_file.flush()

                for field, value in zip(header, data):
                    self.tb_writer.add_scalar(field, value, self.num_frames)

            # Save status

            if save_interval > 0 and self.update % save_interval == 0:
                self._save_training_info()
                if save_env_info:
                    for e in self.training_envs:
                        if hasattr(e, 'save_env_info'): e.save_env_info()

        self._clear_training_envs()

        return True

    def _save_training_info(self):
        """
        Function to save the training info.
        """

        # update the status dictionary
        self.status = {
            "num_frames": self.num_frames,
            "update": self.update,
            "model_state": self.acmodel.state_dict(),
            "optimizer_state": self.algo.optimizer.state_dict()
        }

        if hasattr(self.preprocess_obss,
                   "vocab"):  # if we are using NLP save, NLP info
            self.status["vocab"] = self.preprocess_obss.vocab.vocab

        utils.save_status(self.status,
                          self.model_dir)  # save the status info to model_dir
        self.txt_logger.info("Status saved")

    def _clear_training_envs(self):
        """
        Clear the training environments to free up memory.
        """

        # the termination set gets lost, so we need to store it again
        if hasattr(self.env, 'termination_set'):
            self.env.termination_set = [
                s for e in self.training_envs for s in e.termination_set
            ]

        # clear the env and the training envs
        self.algo.env = None
        self.training_envs = None

    def save(self, f):
        """
        Legacy function for saving the model.

        TODO: place the saving logic for the model here
        :param f:
        """
        print('self.save() - currently not implemented')

    def set_env(self, env):
        """
        Set the environment and clear the training environments

        :param env: environment for training/acting
        """
        # check to make sure the environment is the correct type
        assert isinstance(env, gym.Env)
        self.env = env
        self.training_envs = None

    def predict(self, obs, state=None, deterministic=False):
        """
        Wrapper for training code compatibility.  Calls get_action() to predict the action to take based on the
        current observation.

        :param obs: observation for predicting the action
        :param state: state of the LSTM (unused)
        :param deterministic: whether to use deterministic or probabilistic actions (unused)
        :return: action and LSTM state
        """
        # assert (state==None) and (deterministic==False) # still need to reimplement
        return self.get_action(
            obs
        ), None  # return action, states - states is unused at the moment

    def get_actions(self, obss):
        """
        Get a list of actions for a list of observations.



        :param obss: list of observations for predicting actions
        :return: list of actions for the associated observations
        """
        preprocessed_obss = self.preprocess_obss(obss, device=self.device)

        with torch.no_grad(
        ):  # don't calculate the gradients, since we are doing a forward pass
            if self.acmodel.recurrent:  # if we are using a recurrent model
                dist, _, self.memories = self.acmodel(preprocessed_obss,
                                                      self.memories)
            else:  # otherwise
                dist, _ = self.acmodel(preprocessed_obss)
                # preprocess the observations to put them in a torch-friendly format

        # the acmodel returns a probability distribution
        if self.argmax:  # if we are detemrinistic, take the action with the highest probability
            actions = dist.probs.max(1, keepdim=True)[1]
        else:  # otherwise sample the distribution to select the action
            actions = dist.sample()

        return actions.cpu().numpy()  # reaturn a numpy array, not a tensor

    def get_action(self, obs):
        """
        Wrapper for get_actions() to produce just a single action (rather than a list of actions) for acting.

        :param obs: single observation
        :return: single action
        """
        return self.get_actions([obs])[0]

    def analyze_feedbacks(self, rewards, dones):
        """
        rl-starter-files code.  Not sure what this does.

        :param rewards:
        :param dones:
        """
        if self.acmodel.recurrent:
            masks = 1 - torch.tensor(
                dones, dtype=torch.float, device=self.device).unsqueeze(1)
            self.memories *= masks

    def analyze_feedback(self, reward, done):
        """
        rl-starter-files code.  Not sure what this does (other than wrap analyze_feedbacks().

        :param reward:
        :param done:
        :return:
        """
        return self.analyze_feedbacks([reward], [done])

예제 #4

파일 보기

class Agent:
    """An agent.

    It is able:
    - to choose an action given an observation,
    - to analyze the feedback (i.e. reward and done state) of its action."""
    def __init__(self,
                 env,
                 obs_space,
                 action_space,
                 model_dir,
                 device=None,
                 argmax=False,
                 num_envs=1,
                 use_memory=False,
                 use_text=False):
        obs_space, self.preprocess_obs_goals = utils.get_obs_goals_preprocessor(
            obs_space)
        self.acmodel = ACModel(obs_space,
                               action_space,
                               use_memory=use_memory,
                               use_text=use_text)
        self.device = device
        self.argmax = argmax
        self.num_envs = num_envs

        status = utils.get_status(model_dir)

        self.goals = list(status['agent_goals'].values())
        # for goal in self.goals:
        #     goal = env.unwrapped.get_obs_render( goal, tile_size=32)
        #     plt.imshow(goal)
        #     plt.show()

        if self.acmodel.recurrent:
            self.memories = torch.zeros(self.num_envs,
                                        self.acmodel.memory_size,
                                        device=self.device)

        self.acmodel.load_state_dict(status["model_state"])
        self.acmodel.to(self.device)
        self.acmodel.eval()
        if hasattr(self.preprocess_obs_goals, "vocab"):
            self.preprocess_obs_goals.vocab.load_vocab(status["vocab"])

    def concat_obs_goal(self, obs):
        if 'image' in obs:
            obs_goals = [{
                "image":
                np.concatenate((obs["image"], self.goals[i]), axis=2),
                "mission":
                obs['mission']
            } for i in range(len(self.goals))]
        else:
            obs_goals = [
                np.concatenate((obs, self.goals[i]), axis=2)
                for i in range(len(self.goals))
            ]
        return obs_goals

    def get_actions(self, obss):
        actions = np.zeros(len(obss), dtype=int)

        for i in range(len(obss)):
            memory = self.memories[i]

            obs_goals = self.concat_obs_goal(obss[i])
            preprocessed_obs_goals = self.preprocess_obs_goals(
                obs_goals, device=self.device)

            with torch.no_grad():
                if self.acmodel.recurrent:
                    memory = torch.stack([memory] * len(self.goals), 0)
                    dists, values, memory = self.acmodel(
                        preprocessed_obs_goals, memory)
                else:
                    dists, values = self.acmodel(preprocessed_obs_goals)
            g = values.data.max(0)[1]
            print(values.data, g)
            if self.argmax:
                actions[i] = dists.probs.max(1,
                                             keepdim=True)[1][g].cpu().numpy()
            else:
                actions[i] = dists.sample()[g].cpu().numpy()

            if self.acmodel.recurrent:
                self.memories[i] = memory[g]

        return actions

    def reset(self):
        if self.acmodel.recurrent:
            self.memories = torch.zeros(self.num_envs,
                                        self.acmodel.memory_size,
                                        device=self.device)

    def get_action(self, obs):
        return self.get_actions([obs])[0]

    def analyze_feedbacks(self, rewards, dones):
        if self.acmodel.recurrent:
            masks = 1 - torch.tensor(
                dones, dtype=torch.float, device=self.device).unsqueeze(1)
            self.memories *= masks

    def analyze_feedback(self, reward, done):
        return self.analyze_feedbacks([reward], [done])

예제 #5

파일 보기

파일: agent.py 프로젝트: LTL2Action/LTL2Action

class Agent:
    """An agent.

    It is able:
    - to choose an action given an observation,
    - to analyze the feedback (i.e. reward and done state) of its action."""
    def __init__(self,
                 env,
                 obs_space,
                 action_space,
                 model_dir,
                 ignoreLTL,
                 progression_mode,
                 gnn,
                 recurrence=1,
                 dumb_ac=False,
                 device=None,
                 argmax=False,
                 num_envs=1):
        try:
            print(model_dir)
            status = utils.get_status(model_dir)
        except OSError:
            status = {"num_frames": 0, "update": 0}

        using_gnn = (gnn != "GRU" and gnn != "LSTM")
        obs_space, self.preprocess_obss = utils.get_obss_preprocessor(
            env, using_gnn, progression_mode)
        if "vocab" in status and self.preprocess_obss.vocab is not None:
            self.preprocess_obss.vocab.load_vocab(status["vocab"])

        if recurrence > 1:
            self.acmodel = RecurrentACModel(env, obs_space, action_space,
                                            ignoreLTL, gnn, dumb_ac, True)
            self.memories = torch.zeros(num_envs,
                                        self.acmodel.memory_size,
                                        device=device)
        else:
            self.acmodel = ACModel(env, obs_space, action_space, ignoreLTL,
                                   gnn, dumb_ac, True)

        self.device = device
        self.argmax = argmax
        self.num_envs = num_envs

        self.acmodel.load_state_dict(utils.get_model_state(model_dir))
        self.acmodel.to(self.device)
        self.acmodel.eval()

    def get_actions(self, obss):
        preprocessed_obss = self.preprocess_obss(obss, device=self.device)

        with torch.no_grad():
            if self.acmodel.recurrent:
                dist, _, self.memories = self.acmodel(preprocessed_obss,
                                                      self.memories)
            else:
                dist, _ = self.acmodel(preprocessed_obss)

        if self.argmax:
            actions = dist.probs.max(1, keepdim=True)[1]
        else:
            actions = dist.sample()

        return actions.cpu().numpy()

    def get_action(self, obs):
        return self.get_actions([obs])[0]

    def analyze_feedbacks(self, rewards, dones):
        if self.acmodel.recurrent:
            masks = 1 - torch.tensor(dones, dtype=torch.float).unsqueeze(1)
            self.memories *= masks

    def analyze_feedback(self, reward, done):
        return self.analyze_feedbacks([reward], [done])