def __init__(self, obs_shape, action_space, hidden_size=100, embed_size=0, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], obs_shape[1:], hidden_size, embed_size, **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs)
def create_output_distribution(action_space, output_size): if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n dist = Categorical(output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] dist = DiagGaussian(output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] dist = Bernoulli(output_size, num_outputs) elif action_space.__class__.__name__ == "MultiDiscrete": num_outputs = action_space.shape[0] dist = DiagGaussian(output_size, num_outputs) elif action_space.__class__.__name__ == "Tuple": dists = [ create_output_distribution(space, output_size) for space in action_space ] # for space in action_space: # print(action_space.__class__.__name__) dist = DistributionGeneratorTuple(tuple(dists)) else: raise NotImplementedError return dist
def __init__(self, obs_shape, ac_shape, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], **base_kwargs) num_outputs = ac_shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs)
def __init__(self, obs_shape, action_space, is_leaf, base=None, base_kwargs=None): super(OpsPolicy, self).__init__() if base_kwargs is None: base_kwargs = {} self.base = OpsBase(obs_shape[0], action_space, is_leaf=is_leaf, **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) elif isinstance(action_space, gym.spaces.MultiDiscrete): self.dist = MultiCategoricalDistribution(self.base.output_size, int(np.sum(action_space.nvec)), action_space.nvec) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, agent_num, agent_i, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError # self.base = base(obs_shape[0], **base_kwargs) # actor输入维度num_state,critic输入num_state*agent_num self.base = base(obs_shape[0], agent_num, **base_kwargs) #import pdb; pdb.set_trace() if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): #def __init__(self, obs_shape, action_space,action_space2, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} # if base is None: # if len(obs_shape) == 3: # base = CNNBase # elif len(obs_shape) == 1: # base = MLPBase # else: # raise NotImplementedError #base = base self.base = base #base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n #num_outputs2 = action_space2.n self.dist = Categorical(self.base.output_size, num_outputs) #self.dist2 = Categorical(self.base.output_size, num_outputs2) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(512, 1))
def __init__(self, obs_shape, action_space, action_activation=None, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError # print("21312312") if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n def action_embedding(actions): return torch.nn.functional.one_hot( actions, num_classes=num_outputs).squeeze(-2) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] def action_embedding(actions): return actions elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] raise NotImplementedError else: raise NotImplementedError base_kwargs["num_actions"] = num_outputs base_kwargs["action_embedding"] = action_embedding self.ob_dim = obs_shape[0] self.h_dim = base_kwargs["hidden_size"] self.ac_dim = num_outputs # print("start") self.base = base(obs_shape[0], **base_kwargs) # print('finish') if action_space.__class__.__name__ == "Discrete": self.dist = Categorical(self.base.output_size, num_outputs, is_ref=base_kwargs["is_ref"]) elif action_space.__class__.__name__ == "Box": self.dist = DiagGaussian(self.base.output_size, num_outputs, activation=action_activation) elif action_space.__class__.__name__ == "MultiBinary": self.dist = Bernoulli(self.base.output_size, num_outputs) self.obs_rms = None
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base == 'Mnist': base = CNNMnist if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base( obs_shape, **base_kwargs ) # TODO Lia changed this, the original is the below commendted line #self.base = base(obs_shape[0], **base_kwargs) print("obs_shape[0]:", obs_shape[0]) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError # This key is needed to configure the CNN correctly for Genesis, but causes problems with other domains if (not base_kwargs['is_genesis']): del base_kwargs['is_genesis'] self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, feat_dim, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: if feat_dim == 0: self.base = CNNBase(obs_shape[0], **base_kwargs) else: self.base = CombinedBase(obs_shape[0], feat_dim, **base_kwargs) elif len(obs_shape) == 1: self.base = MLPBase(obs_shape[0], **base_kwargs) else: raise NotImplementedError if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical_masked(self.base.output_size, num_outputs) self.act_dim = action_space.n self.ent_denom = math.log(self.act_dim) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, model='base'): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: if (model == 'base'): base = CNNBase elif (model == 'resnet'): base = CNNDeep else: raise Exception('Model not implemented') elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.tt = 0 self.nn = 0 self.visionmodel = None self.knob_target_hist = torch.zeros(1, 3).cuda() self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, base_encoder='simple'): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: if 'mlp' in base_encoder: base = MLPBase elif 'deep' in base_encoder: base = DeepMLPBase elif 'attn' in base_encoder: base = AttnMLP elif 'special' in base_encoder: base = SpecialMLP elif 'MHSA' in base_encoder: base = MHeadAttnModel else: raise NotImplementedError self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, is_minigrid=False, use_rew=False): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3 and not (is_minigrid): base = CNNBase elif len(obs_shape) == 3 and is_minigrid: base = CNN_minigrid elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], **base_kwargs) self.use_rew = use_rew if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = Adaptor(base=base, obs_shape=obs_shape, n_classes=512, **base_kwargs) self.base.critic_linear = nn.Linear(self.base.output_size, 1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, key_value = 0): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if key_value == 1: base = KeyValueBase else: if base is None: if len(obs_shape) == 3: base = CNNBase # base = RLINE #Swap CNNBase for RLINE if you want RLINE elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_space, obs_process, obs_module, action_space, base_kwargs=None): super(Policy, self).__init__() self.obs_space = obs_space self.obs_process = obs_process self.obs_module = obs_module if base_kwargs is None: base_kwargs = {} # base takes all of the observations and produces a single feature vector self.base = NNBase2(obs_space, obs_process, obs_module, **base_kwargs) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(self.base.output_size, 1)) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(APolicy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: base = MLPBase self.hidden_size = 256 if action_space.__class__.__name__ == "Discrete": num_inputs = action_space.n + obs_shape[0] * 2 #num_inputs = obs_shape[0] num_outputs = action_space.n self.dist = Categorical(self.hidden_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_inputs = action_space.shape[0] + obs_shape[0] * 2 #num_inputs = obs_shape[0] num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.hidden_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_inputs = action_space.shape[0] + obs_shape[0] * 2 #num_inputs = obs_shape[0] num_outputs = action_space.shape[0] self.dist = Bernoulli(self.hidden_size, num_outputs) else: raise NotImplementedError self.base = base(num_inputs, num_outputs)
def __init__(self, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} base = MLPBase obs_size = get_v('obs_size') self.base = base(obs_size, **base_kwargs) self.dist = nn.ModuleList([DiagGaussian(self.base.output_size, 2), Categorical(self.base.output_size, 2)])
def __init__(self, num_inputs, num_outputs, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: base = MLPBase self.base = base(num_inputs, **base_kwargs) self.dist = DiagGaussian(self.base.output_size, num_outputs)
def __init__(self, obs_shape, action_space, hidden_size): super(Policy, self).__init__() if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(num_outputs, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(num_outputs, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(num_outputs, num_outputs) self.base = nn.Linear(obs_shape[0], hidden_size) self.value_head = nn.Linear(hidden_size, 1) self.action_head = nn.Linear(hidden_size, 2)
def __init__(self, obs_shape, action_space, other_cars=False, ego_dim=None, beta_dist=False, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], other_cars=other_cars, ego_dim=ego_dim, **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.beta_dist = beta_dist if self.beta_dist: self.dist = BetaDist(self.base.output_size, num_outputs) self.entropy_lb = Variable( torch.distributions.Beta(20, 20).entropy().float()) else: self.dist = DiagGaussian(self.base.output_size, num_outputs) self.hi_lim = action_space.high self.lo_lim = action_space.low elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, IAM=False, RNN=False, base_kwargs=None): super(IAMPolicy, self).__init__() if base_kwargs is None: base_kwargs = {} self.IAM = IAM self.recurrent = RNN if len(obs_shape) == 3: if self.IAM: print("Using IAMBaseCNN") base = IAMBaseCNN else: print("Using CNNBase") base = CNNBase elif len(obs_shape) == 1: if self.IAM: print("Using IAMBase") base = IAMBase elif self.recurrent: print("Using RNNBase") base = RNNBase else: print("Using MLPBase") base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": print("discrete") num_outputs = action_space.n self.dist = Categorical(self.base.output_size(), num_outputs) elif action_space.__class__.__name__ == "Box": print("Box") num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size(), num_outputs) elif action_space.__class__.__name__ == "MultiBinary": print("MultiBinary") num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size(), num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, navi=False, hidden_size=64, n_layers=2): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: # TODO(add hidden size) base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError print("DEV: PPO using base:", type(base).__name__) self.base = base(obs_shape[0], hidden_size=hidden_size, n_layers=n_layers, **base_kwargs) # print(self.base.state_dict().keys()) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n net_outputs = self.base.output_size if navi: net_outputs = 256 * 10 self.dist = Categorical(net_outputs, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} # if base is None: # if len(obs_shape) == 3: # base = CNNBase # elif len(obs_shape) == 1: # base = MLPBase # else: # raise NotImplementedError if base == 'mlp': self.base = MLPBase( obs_shape[0] * 2, **base_kwargs) # adding prev observation to the input elif base == 'shared': self.base = SharedBase( obs_shape[0] * 2, **base_kwargs) # adding prev observation to the input elif base == 'osc': self.base = OscBase( obs_shape[0] * 2, ** base_kwargs) # adding prev observation that includes sim time else: raise NotImplementedError # self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] # self.dist = OrnsteinUhlenbeckActionNoise() self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Model, self).__init__() self.hidden_size = 128 if base_kwargs is None: base_kwargs = {} if base is None: base = MLPBase if action_space.__class__.__name__ == "Discrete": num_inputs = action_space.n + obs_shape[0] elif action_space.__class__.__name__ == "Box": num_inputs = action_space.shape[0] + obs_shape[0] elif action_space.__class__.__name__ == "MultiBinary": num_inputs = action_space.shape[0] + obs_shape[0] else: raise NotImplementedError num_outputs = obs_shape[0] self.dist = DiagGaussian(self.hidden_size, num_outputs) self.base = base(num_inputs, obs_shape[0])
def __init__(self, obs_shape, action_space, zero_last_layer=False, base=None, base_kwargs=None, dist=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if isinstance(obs_shape[0], tuple): base = E2EBase elif len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape, **base_kwargs) else: self.base = base if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs, zll=zero_last_layer) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError if dist is not None: self.dist = dist
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, load_expert=None, env_name=None, rl_baseline_zoo_dir=None, expert_algo=None, normalize=True): super(Policy, self).__init__() #TODO: Pass these parameters in self.epsilon = 0.1 self.dril = True if base_kwargs is None: base_kwargs = {} if base is None: if env_name in ['duckietown']: base = DuckieTownCNN elif len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], normalize=normalize, **base_kwargs) self.action_space = None if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) self.action_space = "Discrete" elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) self.action_space = "Box" elif action_space.__class__.__name__ == "MultiBinary": raise Exception('Error') else: raise NotImplementedError if load_expert == True and env_name not in [ 'duckietown', 'highway-v0' ]: print('[Loading Expert --- Base]') model_path = os.path.join(rl_baseline_zoo_dir, 'trained_agents', f'{expert_algo}') try: import mpi4py from stable_baselines import TRPO except ImportError: mpi4py = None DDPG, TRPO = None, None from stable_baselines import PPO2 model_path = f'{model_path}/{env_name}.pkl' if env_name in ['AntBulletEnv-v0']: baselines_model = TRPO.load(model_path) else: baselines_model = PPO2.load(model_path) for key, value in baselines_model.get_parameters().items(): print(key, value.shape) if base.__name__ == 'CNNBase': print(['Loading CNNBase expert model']) params = copy_cnn_weights(baselines_model) elif load_expert == True and base.__name__ == 'MLPBase': print(['Loading MLPBase expert model']) params = copy_mlp_weights(baselines_model) #TODO: I am not sure what this is doing try: self.load_state_dict(params) self.obs_shape = obs_shape[0] except: self.base = base(obs_shape[0] + 1, **base_kwargs) self.load_state_dict(params) self.obs_shape = obs_shape[0] + 1
def __init__(self, obs_shape, action_space, architecture, state_channels, hidden_size, recurse_depth=1, pool_inject=False, **kwargs): super(RecurrentPolicy, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) num_inputs = obs_shape[0] im_size = obs_shape[1] assert im_size == obs_shape[2] self.spatial_latent_size = (7, 7) self.hidden_size = hidden_size self.architecture = architecture self.is_recurrent = architecture in ["rnn", "crnn"] if im_size == 84: self.encoder = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(), init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(64, state_channels, 3, stride=1)), nn.ReLU(), ) elif im_size == 64: self.encoder = nn.Sequential( # input (x, 64, 64) init_(nn.Conv2d(num_inputs, 32, 6, stride=4, padding=1)), nn.ReLU(), # input (3, 16, 16) init_(nn.Conv2d(32, 64, 4, stride=2, padding=2)), nn.ReLU(), # input (3, 9, 9) init_(nn.Conv2d(64, state_channels, 3, stride=1)), nn.ReLU(), # input (3, 7, 7) ) else: raise NotImplementedError if architecture == "ff": self.transition = NoTransition(hidden_size, state_channels=state_channels) elif architecture == "rnn": self.transition = RNNTransition(hidden_size, state_channels=state_channels, recurse_depth=recurse_depth) elif architecture == "crnn": self.transition = CRNNTransition(hidden_size, state_channels=state_channels, recurse_depth=recurse_depth, pool_inject=pool_inject) else: raise NotImplementedError init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(self.hidden_size, 1)) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.hidden_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.hidden_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.hidden_size, num_outputs) else: raise NotImplementedError self.train()