예제 #1
0
    def __init__(self, envs, args, device, rnn_state_size):
        self.device = device
        self.args = args
        self.envs = envs

        self.episode_rewards = deque(maxlen=100)

        self.config = env_config(args.env_name)

        self.is_embed = should_embed(args.env_name)

        self.rollouts = RolloutStorage(self.args, self.config, rnn_state_size,
                                       self.is_embed)
        obs = self.envs.reset()

        self.rollouts.obs[0].copy_(obs)
        self.rollouts.to(self.device)
    def __init__(self, envs, args):
        self.args = args
        self.envs = envs

        assert (self.args.policy_file is not None)

        self.episode_rewards = deque(maxlen=100)

        self.config = env_config(args.env_name)

        self.is_embed = should_embed(args.env_name)

        self.actor_critic = torch.load(self.args.policy_file)

        # We want to act in each step
        self.args.num_steps = 1

        self.rollouts = RolloutStorage(self.args, self.config,
                                       self.actor_critic.rnn_state_size,
                                       self.is_embed)

        # Turn this on to save reconstructed beliefs to text files
        self.log_est_belief = False
        self.traj_cnt = 0

        if self.log_est_belief:
            self.actor_belief_file = open('ab' + str(self.traj_cnt) + ".txt",
                                          "w")
            self.critic_belief_file = open('cb' + str(self.traj_cnt) + ".txt",
                                           "w")

        obs = self.envs.reset()
        state = self.envs.get_state()
        belief = self.envs.get_belief()

        self.rollouts.obs[0].copy_(obs)
        self.rollouts.state[0].copy_(state)
        self.rollouts.belief[0].copy_(belief)

        self.rollouts.to('cpu')
예제 #3
0
    def __init__(self, envs, args, log_dir):
        self.device = args.device
        self.args = args
        self.envs = envs

        self.episode_rewards = deque(maxlen=100)

        self.config = env_config(args.env_name)

        num_updates = int(
            args.num_env_steps) // args.num_steps // args.num_processes

        if self.args.use_linear_entropy_decay:
            self.entropy_schedule = ExponentialSchedule(
                args.entropy_coef, 2e-5, num_updates)

        self.should_embed = should_embed(args.env_name)

        self.experience_mem = []

        self.model_path = os.path.join(
            log_dir, self.args.algo + '.' + str(self.args.seed) + '.mdl')

        # Using belief loss, saved file format is different
        if self.args.algo == 'ah-ch' and self.args.belief_loss_coef > 0.0:
            self.model_path = os.path.join(
                log_dir,
                self.args.algo + '.b' + '.' + str(self.args.seed) + '.mdl')
            self.transitions_path = os.path.join(
                log_dir, self.args.env_name + '.' + self.args.algo + '.b' +
                '.' + str(self.args.seed) + '.exp')
        else:
            self.model_path = os.path.join(
                log_dir, self.args.algo + '.' + str(self.args.seed) + '.mdl')
            self.transitions_path = os.path.join(
                log_dir, self.args.env_name + '.' + self.args.algo + '.' +
                str(self.args.seed) + '.exp')
        self.setup_agent()