def build(self): """Build.""" self.fc1 = nn.Linear(self.observation_space.shape[0], 256) self.fc2 = nn.Linear(256, 256) self.dist_torque = DiagGaussian(256, self.action_space['torque'].shape[0], constant_log_std=False) self.dist_position = DiagGaussian(256, self.action_space['position'].shape[0], constant_log_std=False) for p in self.dist_torque.fc_mean.parameters(): nn.init.constant_(p, 0.) for p in self.dist_position.fc_mean.parameters(): nn.init.constant_(p, 0.)
def __init__(self, ob_shape, action_shape, nunits): """Init.""" super().__init__() self.nunits = nunits self.fc1 = nn.Linear(ob_shape, self.nunits) self.fc2 = nn.Linear(self.nunits, self.nunits) self.dist = DiagGaussian(self.nunits, action_shape) self.ob_shape = ob_shape
def build(self): """Build network.""" inshape = self.observation_space.shape[0] self.net = FeedForwardNet(inshape, [32, 32], activate_last=True) if hasattr(self.action_space, 'n'): self.dist = Categorical(32, self.action_space.n) else: self.dist = DiagGaussian(32, self.action_space.shape[0]) self.vf = torch.nn.Linear(32, 1)
def build(self): """Build.""" inshape = self.observation_space.shape[0] self.fc1 = nn.Linear(inshape, self.nunits) self.fc2 = nn.Linear(self.nunits, self.nunits) self.dist = DiagGaussian(self.nunits, self.action_space.shape[0]) for p in self.dist.fc_mean.parameters(): nn.init.constant_(p, 0.) self.vf_fc1 = nn.Linear(inshape, self.nunits) self.vf_fc2 = nn.Linear(self.nunits, self.nunits) self.vf_out = nn.Linear(self.nunits, 1)
def build(self): """Build.""" inshape = (self.observation_space.spaces[0].shape[0] + self.observation_space.spaces[1].shape[0]) self.fc1 = nn.Linear(inshape, 32) self.fc2 = nn.Linear(32, 32) self.fc3 = nn.Linear(32, 32) self.dist = DiagGaussian(32, self.action_space.shape[0]) for p in self.dist.fc_mean.parameters(): nn.init.constant_(p, 0.) self.vf_fc1 = nn.Linear(inshape, 32) self.vf_fc2 = nn.Linear(32, 32) self.vf_fc3 = nn.Linear(32, 32) self.vf_out = nn.Linear(32, 1)
def build(self): """Build.""" self.fc1 = nn.Linear(self.observation_space.shape[0], 128) self.fc2 = nn.Linear(128, 128) self.dist = DiagGaussian(128, self.action_space.shape[0])