def create_output_distribution(action_space, output_size): if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n dist = Categorical(output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] dist = DiagGaussian(output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] dist = Bernoulli(output_size, num_outputs) elif action_space.__class__.__name__ == "MultiDiscrete": num_outputs = action_space.shape[0] dist = DiagGaussian(output_size, num_outputs) elif action_space.__class__.__name__ == "Tuple": dists = [ create_output_distribution(space, output_size) for space in action_space ] # for space in action_space: # print(action_space.__class__.__name__) dist = DistributionGeneratorTuple(tuple(dists)) else: raise NotImplementedError return dist
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.tt = 0 self.nn = 0 self.visionmodel = None self.knob_target_hist = torch.zeros(1, 3).cuda() self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError # This key is needed to configure the CNN correctly for Genesis, but causes problems with other domains if (not base_kwargs['is_genesis']): del base_kwargs['is_genesis'] self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) elif isinstance(action_space, gym.spaces.MultiDiscrete): self.dist = MultiCategoricalDistribution(self.base.output_size, int(np.sum(action_space.nvec)), action_space.nvec) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, is_leaf, base=None, base_kwargs=None): super(OpsPolicy, self).__init__() if base_kwargs is None: base_kwargs = {} self.base = OpsBase(obs_shape[0], action_space, is_leaf=is_leaf, **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, num_agents=1): super(PolicyShareBase, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase else: raise NotImplementedError self.base = base(obs_shape[2], **base_kwargs) num_outputs = action_space.n self.dists = nn.ModuleList([ Categorical(self.base.output_size, num_outputs) for _ in range(num_agents) ]) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linears = nn.ModuleList([ init_(nn.Linear(self.base.output_size, 1)) for _ in range(num_agents) ]) self.num_agents = num_agents
def __init__(self, obs_shape, action_space, agent_num, agent_i, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError # self.base = base(obs_shape[0], **base_kwargs) # actor输入维度num_state,critic输入num_state*agent_num self.base = base(obs_shape[0], agent_num, **base_kwargs) #import pdb; pdb.set_trace() if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, coord_size, input_size=(1, 1), action_space=1, hidden_size=1, window_size=1, action_embedding=0): # input_size: (#lstm_input, #mlp_input) super().__init__() # TODO: should change "batch_size" to coord_size self.net = BasicNet(coord_size, input_size=(input_size[0] + action_embedding, input_size[1]), hidden_size=hidden_size, window_size=window_size) # will coordinate-wisely return distributions self.action_distribution = Categorical(input_size[0] * hidden_size + input_size[1] + 1, action_space, coord_size=coord_size) self.critic = CriticHead( coord_size * (input_size[0] * hidden_size + input_size[1] + 1)) self.recurrent_hidden_state_size = hidden_size self.coord_size = coord_size self.input_size = input_size self.action_space = action_space self.hidden_size = hidden_size self.window_size = window_size self.action_embedding_size = action_embedding
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base == 'Mnist': base = CNNMnist if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base( obs_shape, **base_kwargs ) # TODO Lia changed this, the original is the below commendted line #self.base = base(obs_shape[0], **base_kwargs) print("obs_shape[0]:", obs_shape[0]) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_space, obs_process, obs_module, action_space, base_kwargs=None): super(Policy, self).__init__() self.obs_space = obs_space self.obs_process = obs_process self.obs_module = obs_module if base_kwargs is None: base_kwargs = {} # base takes all of the observations and produces a single feature vector self.base = NNBase2(obs_space, obs_process, obs_module, **base_kwargs) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(self.base.output_size, 1)) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: if obs_shape[-1] == 84: base = CNNBase elif obs_shape[-1] == 64: base = CNNBase64 else: raise NotImplementedError elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, action_activation=None, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError # print("21312312") if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n def action_embedding(actions): return torch.nn.functional.one_hot( actions, num_classes=num_outputs).squeeze(-2) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] def action_embedding(actions): return actions elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] raise NotImplementedError else: raise NotImplementedError base_kwargs["num_actions"] = num_outputs base_kwargs["action_embedding"] = action_embedding self.ob_dim = obs_shape[0] self.h_dim = base_kwargs["hidden_size"] self.ac_dim = num_outputs # print("start") self.base = base(obs_shape[0], **base_kwargs) # print('finish') if action_space.__class__.__name__ == "Discrete": self.dist = Categorical(self.base.output_size, num_outputs, is_ref=base_kwargs["is_ref"]) elif action_space.__class__.__name__ == "Box": self.dist = DiagGaussian(self.base.output_size, num_outputs, activation=action_activation) elif action_space.__class__.__name__ == "MultiBinary": self.dist = Bernoulli(self.base.output_size, num_outputs) self.obs_rms = None
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, is_minigrid=False, use_rew=False): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3 and not (is_minigrid): base = CNNBase elif len(obs_shape) == 3 and is_minigrid: base = CNN_minigrid elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], **base_kwargs) self.use_rew = use_rew if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(APolicy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: base = MLPBase self.hidden_size = 256 if action_space.__class__.__name__ == "Discrete": num_inputs = action_space.n + obs_shape[0] * 2 #num_inputs = obs_shape[0] num_outputs = action_space.n self.dist = Categorical(self.hidden_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_inputs = action_space.shape[0] + obs_shape[0] * 2 #num_inputs = obs_shape[0] num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.hidden_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_inputs = action_space.shape[0] + obs_shape[0] * 2 #num_inputs = obs_shape[0] num_outputs = action_space.shape[0] self.dist = Bernoulli(self.hidden_size, num_outputs) else: raise NotImplementedError self.base = base(num_inputs, num_outputs)
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, key_value = 0): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if key_value == 1: base = KeyValueBase else: if base is None: if len(obs_shape) == 3: base = CNNBase # base = RLINE #Swap CNNBase for RLINE if you want RLINE elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): #def __init__(self, obs_shape, action_space,action_space2, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} # if base is None: # if len(obs_shape) == 3: # base = CNNBase # elif len(obs_shape) == 1: # base = MLPBase # else: # raise NotImplementedError #base = base self.base = base #base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n #num_outputs2 = action_space2.n self.dist = Categorical(self.base.output_size, num_outputs) #self.dist2 = Categorical(self.base.output_size, num_outputs2) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(512, 1))
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, base_encoder='simple'): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: if 'mlp' in base_encoder: base = MLPBase elif 'deep' in base_encoder: base = DeepMLPBase elif 'attn' in base_encoder: base = AttnMLP elif 'special' in base_encoder: base = SpecialMLP elif 'MHSA' in base_encoder: base = MHeadAttnModel else: raise NotImplementedError self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = Adaptor(base=base, obs_shape=obs_shape, n_classes=512, **base_kwargs) self.base.critic_linear = nn.Linear(self.base.output_size, 1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} base = MLPBase obs_size = get_v('obs_size') self.base = base(obs_size, **base_kwargs) self.dist = nn.ModuleList([DiagGaussian(self.base.output_size, 2), Categorical(self.base.output_size, 2)])
def __init__(self, obs_shape, action_space, hidden_size): super(Policy, self).__init__() if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(num_outputs, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(num_outputs, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(num_outputs, num_outputs) self.base = nn.Linear(obs_shape[0], hidden_size) self.value_head = nn.Linear(hidden_size, 1) self.action_head = nn.Linear(hidden_size, 2)
def __init__(self, occ_obs_shape, sign_obs_shape, state_rep, action_space, recurrent_policy): super(Policy, self).__init__() if state_rep in ['sign', 'original']: self.base = MLPBase(sign_obs_shape, recurrent_policy) elif state_rep == 'full': self.base = CNNBase(occ_obs_shape, sign_obs_shape, recurrent_policy) else: raise NotImplemented( 'Only implemented sign, origianal, and full state representation' ) num_outputs = action_space.n # 2 self.dist = Categorical(self.base.output_size, num_outputs)
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} # if base is None: # if len(obs_shape) == 3: # base = CNNBase # elif len(obs_shape) == 1: # base = MLPBase # else: # raise NotImplementedError # self.base = base(obs_shape[0], **base_kwargs) self.base = base self.base.train() self.dist = Categorical()
def __init__(self, obs_shape, action_space, other_cars=False, ego_dim=None, beta_dist=False, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], other_cars=other_cars, ego_dim=ego_dim, **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.beta_dist = beta_dist if self.beta_dist: self.dist = BetaDist(self.base.output_size, num_outputs) self.entropy_lb = Variable( torch.distributions.Beta(20, 20).entropy().float()) else: self.dist = DiagGaussian(self.base.output_size, num_outputs) self.hi_lim = action_space.high self.lo_lim = action_space.low elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, navi=False, hidden_size=64, n_layers=2): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: # TODO(add hidden size) base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError print("DEV: PPO using base:", type(base).__name__) self.base = base(obs_shape[0], hidden_size=hidden_size, n_layers=n_layers, **base_kwargs) # print(self.base.state_dict().keys()) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n net_outputs = self.base.output_size if navi: net_outputs = 256 * 10 self.dist = Categorical(net_outputs, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, IAM=False, RNN=False, base_kwargs=None): super(IAMPolicy, self).__init__() if base_kwargs is None: base_kwargs = {} self.IAM = IAM self.recurrent = RNN if len(obs_shape) == 3: if self.IAM: print("Using IAMBaseCNN") base = IAMBaseCNN else: print("Using CNNBase") base = CNNBase elif len(obs_shape) == 1: if self.IAM: print("Using IAMBase") base = IAMBase elif self.recurrent: print("Using RNNBase") base = RNNBase else: print("Using MLPBase") base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": print("discrete") num_outputs = action_space.n self.dist = Categorical(self.base.output_size(), num_outputs) elif action_space.__class__.__name__ == "Box": print("Box") num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size(), num_outputs) elif action_space.__class__.__name__ == "MultiBinary": print("MultiBinary") num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size(), num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} # if base is None: # if len(obs_shape) == 3: # base = CNNBase # elif len(obs_shape) == 1: # base = MLPBase # else: # raise NotImplementedError if base == 'mlp': self.base = MLPBase( obs_shape[0] * 2, **base_kwargs) # adding prev observation to the input elif base == 'shared': self.base = SharedBase( obs_shape[0] * 2, **base_kwargs) # adding prev observation to the input elif base == 'osc': self.base = OscBase( obs_shape[0] * 2, ** base_kwargs) # adding prev observation that includes sim time else: raise NotImplementedError # self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] # self.dist = OrnsteinUhlenbeckActionNoise() self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, architecture, state_channels, hidden_size, recurse_depth=1, pool_inject=False, **kwargs): super(PolicyNetwork2AM, self).__init__(obs_shape, action_space, architecture, state_channels, hidden_size, recurse_depth=recurse_depth, pool_inject=pool_inject, **kwargs) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_cog = init_(nn.Linear(self.hidden_size, 1)) self.actor_cog = Categorical(self.hidden_size, 2) self.train()
def __init__(self, obs_shape, action_space, zero_last_layer=False, base=None, base_kwargs=None, dist=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if isinstance(obs_shape[0], tuple): base = E2EBase elif len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape, **base_kwargs) else: self.base = base if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs, zll=zero_last_layer) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError if dist is not None: self.dist = dist
def __init__(self, action_space, env_name, device): super(RandomAgent, self).__init__() self.action_space = action_space self.env_name = env_name self.device = device if 'NoFrameskip' in self.env_name: self.feature_size = 1024 else: self.feature_size = 64 if action_space.__class__.__name__ == "Discrete": num_outputs = self.action_space.n self.dist = Categorical(self.feature_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = self.action_space.shape[0] self.dist = DiagGaussian(self.feature_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = self.action_space.shape[0] self.dist = Bernoulli(self.feature_size, num_outputs) else: raise NotImplementedError self.actor_features = torch.rand(self.feature_size)
def __init__(self, obs_shape, action_space, architecture, state_channels, hidden_size, recurse_depth=1, pool_inject=False, **kwargs): super(RecurrentPolicy, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) num_inputs = obs_shape[0] im_size = obs_shape[1] assert im_size == obs_shape[2] self.spatial_latent_size = (7, 7) self.hidden_size = hidden_size self.architecture = architecture self.is_recurrent = architecture in ["rnn", "crnn"] if im_size == 84: self.encoder = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(), init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(64, state_channels, 3, stride=1)), nn.ReLU(), ) elif im_size == 64: self.encoder = nn.Sequential( # input (x, 64, 64) init_(nn.Conv2d(num_inputs, 32, 6, stride=4, padding=1)), nn.ReLU(), # input (3, 16, 16) init_(nn.Conv2d(32, 64, 4, stride=2, padding=2)), nn.ReLU(), # input (3, 9, 9) init_(nn.Conv2d(64, state_channels, 3, stride=1)), nn.ReLU(), # input (3, 7, 7) ) else: raise NotImplementedError if architecture == "ff": self.transition = NoTransition(hidden_size, state_channels=state_channels) elif architecture == "rnn": self.transition = RNNTransition(hidden_size, state_channels=state_channels, recurse_depth=recurse_depth) elif architecture == "crnn": self.transition = CRNNTransition(hidden_size, state_channels=state_channels, recurse_depth=recurse_depth, pool_inject=pool_inject) else: raise NotImplementedError init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(self.hidden_size, 1)) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.hidden_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.hidden_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.hidden_size, num_outputs) else: raise NotImplementedError self.train()