def __init__(self, envs, args, device, rnn_state_size): self.device = device self.args = args self.envs = envs self.episode_rewards = deque(maxlen=100) self.config = env_config(args.env_name) self.is_embed = should_embed(args.env_name) self.rollouts = RolloutStorage(self.args, self.config, rnn_state_size, self.is_embed) obs = self.envs.reset() self.rollouts.obs[0].copy_(obs) self.rollouts.to(self.device)
def __init__(self, envs, args): self.args = args self.envs = envs assert (self.args.policy_file is not None) self.episode_rewards = deque(maxlen=100) self.config = env_config(args.env_name) self.is_embed = should_embed(args.env_name) self.actor_critic = torch.load(self.args.policy_file) # We want to act in each step self.args.num_steps = 1 self.rollouts = RolloutStorage(self.args, self.config, self.actor_critic.rnn_state_size, self.is_embed) # Turn this on to save reconstructed beliefs to text files self.log_est_belief = False self.traj_cnt = 0 if self.log_est_belief: self.actor_belief_file = open('ab' + str(self.traj_cnt) + ".txt", "w") self.critic_belief_file = open('cb' + str(self.traj_cnt) + ".txt", "w") obs = self.envs.reset() state = self.envs.get_state() belief = self.envs.get_belief() self.rollouts.obs[0].copy_(obs) self.rollouts.state[0].copy_(state) self.rollouts.belief[0].copy_(belief) self.rollouts.to('cpu')
def __init__(self, envs, args, log_dir): self.device = args.device self.args = args self.envs = envs self.episode_rewards = deque(maxlen=100) self.config = env_config(args.env_name) num_updates = int( args.num_env_steps) // args.num_steps // args.num_processes if self.args.use_linear_entropy_decay: self.entropy_schedule = ExponentialSchedule( args.entropy_coef, 2e-5, num_updates) self.should_embed = should_embed(args.env_name) self.experience_mem = [] self.model_path = os.path.join( log_dir, self.args.algo + '.' + str(self.args.seed) + '.mdl') # Using belief loss, saved file format is different if self.args.algo == 'ah-ch' and self.args.belief_loss_coef > 0.0: self.model_path = os.path.join( log_dir, self.args.algo + '.b' + '.' + str(self.args.seed) + '.mdl') self.transitions_path = os.path.join( log_dir, self.args.env_name + '.' + self.args.algo + '.b' + '.' + str(self.args.seed) + '.exp') else: self.model_path = os.path.join( log_dir, self.args.algo + '.' + str(self.args.seed) + '.mdl') self.transitions_path = os.path.join( log_dir, self.args.env_name + '.' + self.args.algo + '.' + str(self.args.seed) + '.exp') self.setup_agent()